From 63f1b69b346c033bb14898a29d583ca107adb41b Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 10 Sep 2025 15:26:44 +0200 Subject: [PATCH 01/62] Implement rudimentary upload of datasets to S3 --- app/controllers/DatasetController.scala | 15 +- .../WKRemoteDataStoreController.scala | 10 +- conf/webknossos.latest.routes | 3 +- .../datastore/DataStoreConfig.scala | 7 + .../controllers/DataSourceController.scala | 12 + .../datastore/datavault/Encoding.scala | 13 +- .../helpers/DirectoryConstants.scala | 1 + .../models/datasource/DataLayer.scala | 33 +- .../services/DSRemoteWebknossosClient.scala | 19 ++ .../services/DataSourceService.scala | 48 +++ .../services/uploading/UploadService.scala | 284 +++++++++++++++++- 11 files changed, 403 insertions(+), 42 deletions(-) diff --git a/app/controllers/DatasetController.scala b/app/controllers/DatasetController.scala index c697cabceae..8d9c7c51ca8 100755 --- a/app/controllers/DatasetController.scala +++ b/app/controllers/DatasetController.scala @@ -7,20 +7,17 @@ import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Fox, TristateOptionJsonHelper} import com.scalableminds.webknossos.datastore.models.AdditionalCoordinate -import com.scalableminds.webknossos.datastore.models.datasource.{DataSource, ElementClass} +import com.scalableminds.webknossos.datastore.models.datasource.ElementClass import mail.{MailchimpClient, MailchimpTag} import models.analytics.{AnalyticsService, ChangeDatasetSettingsEvent, OpenDatasetEvent} import models.dataset._ -import models.dataset.explore.{ - ExploreAndAddRemoteDatasetParameters, - WKExploreRemoteLayerParameters, - WKExploreRemoteLayerService -} +import models.dataset.explore.{ExploreAndAddRemoteDatasetParameters, WKExploreRemoteLayerParameters, WKExploreRemoteLayerService} import models.folder.FolderService import models.organization.OrganizationDAO import models.team.{TeamDAO, TeamService} import models.user.{User, UserDAO, UserService} import com.scalableminds.util.tools.{Empty, Failure, Full} +import com.scalableminds.webknossos.datastore.services.DataSourceRegistrationInfo import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.functional.syntax._ import play.api.libs.json._ @@ -71,12 +68,6 @@ object SegmentAnythingMaskParameters { implicit val jsonFormat: Format[SegmentAnythingMaskParameters] = Json.format[SegmentAnythingMaskParameters] } -case class DataSourceRegistrationInfo(dataSource: DataSource, folderId: Option[String], dataStoreName: String) - -object DataSourceRegistrationInfo { - implicit val jsonFormat: OFormat[DataSourceRegistrationInfo] = Json.format[DataSourceRegistrationInfo] -} - class DatasetController @Inject()(userService: UserService, userDAO: UserDAO, datasetService: DatasetService, diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index 68a6d6d51fb..000f7b5a6e6 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -9,7 +9,7 @@ import com.scalableminds.webknossos.datastore.helpers.{LayerMagLinkInfo, MagLink import com.scalableminds.webknossos.datastore.models.UnfinishedUpload import com.scalableminds.webknossos.datastore.models.datasource.{AbstractDataLayer, DataSource, DataSourceId} import com.scalableminds.webknossos.datastore.models.datasource.inbox.{InboxDataSourceLike => InboxDataSource} -import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataStoreStatus} +import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataSourceRegistrationInfo, DataStoreStatus} import com.scalableminds.webknossos.datastore.services.uploading.{ LinkedLayerIdentifier, ReserveAdditionalInformation, @@ -317,6 +317,14 @@ class WKRemoteDataStoreController @Inject()( "organization.notFound", organizationId) ~> NOT_FOUND _ <- Fox.fromBool(organization._id == user._organization) ?~> "notAllowed" ~> FORBIDDEN + existingDatasetOpt <- Fox.fromFuture( + datasetDAO + .findOneByDirectoryNameAndOrganization(directoryName, organization._id)(GlobalAccessContext) + .toFutureOption) + // Uploading creates an unusable dataset first, here we delete it if it exists. + _ <- existingDatasetOpt + .map(existingDataset => datasetDAO.deleteDataset(existingDataset._id, onlyMarkAsDeleted = false)) + .getOrElse(Fox.successful(())) dataset <- datasetService.createVirtualDataset( directoryName, dataStore, diff --git a/conf/webknossos.latest.routes b/conf/webknossos.latest.routes index b5c98c36e83..8f1c711d00a 100644 --- a/conf/webknossos.latest.routes +++ b/conf/webknossos.latest.routes @@ -124,6 +124,7 @@ PUT /datastores/:name/datasources/paths GET /datastores/:name/datasources/:datasetId/paths controllers.WKRemoteDataStoreController.getPaths(name: String, key: String, datasetId: ObjectId) GET /datastores/:name/datasources/:datasetId controllers.WKRemoteDataStoreController.getDataSource(name: String, key: String, datasetId: ObjectId) PUT /datastores/:name/datasources/:datasetId controllers.WKRemoteDataStoreController.updateDataSource(name: String, key: String, datasetId: ObjectId, allowNewPaths: Boolean) +POST /datastores/:name/datasources/:organizationId/:directoryName controllers.WKRemoteDataStoreController.registerDataSource(name: String, key: String, organizationId: String, directoryName: String, token: String) PATCH /datastores/:name/status controllers.WKRemoteDataStoreController.statusUpdate(name: String, key: String) POST /datastores/:name/reserveUpload controllers.WKRemoteDataStoreController.reserveDatasetUpload(name: String, key: String, token: String) GET /datastores/:name/getUnfinishedUploadsForUser controllers.WKRemoteDataStoreController.getUnfinishedUploadsForUser(name: String, key: String, token: String, organizationName: String) @@ -287,7 +288,7 @@ GET /jobs/:id/export # AI Models POST /aiModels/runNeuronModelTraining controllers.AiModelController.runNeuronTraining -POST /aiModels/runInstanceModelTraining controllers.AiModelController.runInstanceTraining +POST /aiModels/runInstanceModelTraining controllers.AiModelController.runInstanceTraining POST /aiModels/inferences/runCustomNeuronModelInference controllers.AiModelController.runCustomNeuronInference POST /aiModels/inferences/runCustomInstanceModelInference controllers.AiModelController.runCustomInstanceModelInference GET /aiModels/inferences/:id controllers.AiModelController.readAiInferenceInfo(id: ObjectId) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreConfig.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreConfig.scala index 854ae1629c6..fa704074507 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreConfig.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreConfig.scala @@ -60,6 +60,13 @@ class DataStoreConfig @Inject()(configuration: Configuration) extends ConfigRead object DataVaults { val credentials: List[Config] = getList[Config]("datastore.dataVaults.credentials") } + object S3Upload { + val enabled: Boolean = get[Boolean]("datastore.s3Upload.enabled") + val endpoint: String = get[String]("datastore.s3Upload.endpoint") + val bucketName: String = get[String]("datastore.s3Upload.bucketName") + val objectKeyPrefix: String = get[String]("datastore.s3Upload.objectKeyPrefix") + val credentialName: String = get[String]("datastore.s3Upload.credentialName") + } val children = List(WebKnossos, WatchFileSystem, Cache, AdHocMesh, Redis, AgglomerateSkeleton) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 08fadfef32f..30e0da1e4b3 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -7,6 +7,7 @@ import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Box, Empty, Failure, Fox, FoxImplicits, Full} import com.scalableminds.webknossos.datastore.ListOfLong.ListOfLong +import com.scalableminds.webknossos.datastore.datavault.S3DataVault import com.scalableminds.webknossos.datastore.explore.{ ExploreRemoteDatasetRequest, ExploreRemoteDatasetResponse, @@ -196,6 +197,17 @@ class DataSourceController @Inject()( totalChunkCount, chunkNumber, new File(chunkFile.ref.path.toString)) + + /*_ <- uploadService.handleUploadChunkAws( + uploadFileId, + chunkSize, + currentChunkSize, + totalChunkCount, + chunkNumber, + new File(chunkFile.ref.path.toString), + "webknossos-test", + s"upload-tests/upload-test-${uploadFileId}", + )*/ } yield Ok } } yield result diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/Encoding.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/Encoding.scala index 84157e7d413..ac7b1f98f21 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/Encoding.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/Encoding.scala @@ -12,11 +12,12 @@ object Encoding extends ExtendedEnumeration { // List of possible entries: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding def fromRfc7231String(s: String): Box[Encoding] = s match { - case "gzip" => Full(gzip) - case "x-gzip" => Full(gzip) - case "br" => Full(brotli) - case "identity" => Full(identity) - case "" => Full(identity) - case _ => Failure(s"Unsupported encoding: $s") + case "gzip" => Full(gzip) + case "x-gzip" => Full(gzip) + case "br" => Full(brotli) + case "identity" => Full(identity) + case "" => Full(identity) + case "aws-chunked" => Full(identity) // TODO: Does this work? + case _ => Failure(s"Unsupported encoding: $s") } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DirectoryConstants.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DirectoryConstants.scala index bacbabe09da..0f9f9366023 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DirectoryConstants.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DirectoryConstants.scala @@ -4,4 +4,5 @@ trait DirectoryConstants { val forConversionDir = ".forConversion" val trashDir = ".trash" val uploadingDir: String = ".uploading" + val uploadToS3Dir = ".cloud" } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataLayer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataLayer.scala index 90a5c8eb15b..4f18aedd2eb 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataLayer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataLayer.scala @@ -412,7 +412,8 @@ trait DataLayerWithMagLocators extends DataLayer { defaultViewConfigurationMapping: Option[LayerViewConfiguration] => Option[LayerViewConfiguration] = l => l, magMapping: MagLocator => MagLocator = m => m, name: String = this.name, - coordinateTransformations: Option[List[CoordinateTransformation]] = this.coordinateTransformations) + coordinateTransformations: Option[List[CoordinateTransformation]] = this.coordinateTransformations, + attachmentMapping: DatasetLayerAttachments => DatasetLayerAttachments = a => a) : DataLayerWithMagLocators = this match { case l: ZarrDataLayer => @@ -421,7 +422,8 @@ trait DataLayerWithMagLocators extends DataLayer { defaultViewConfiguration = defaultViewConfigurationMapping(l.defaultViewConfiguration), mags = l.mags.map(magMapping), name = name, - coordinateTransformations = coordinateTransformations + coordinateTransformations = coordinateTransformations, + attachments = l.attachments.map(attachmentMapping) ) case l: ZarrSegmentationLayer => l.copy( @@ -429,7 +431,8 @@ trait DataLayerWithMagLocators extends DataLayer { defaultViewConfiguration = defaultViewConfigurationMapping(l.defaultViewConfiguration), mags = l.mags.map(magMapping), name = name, - coordinateTransformations = coordinateTransformations + coordinateTransformations = coordinateTransformations, + attachments = l.attachments.map(attachmentMapping) ) case l: N5DataLayer => l.copy( @@ -437,7 +440,8 @@ trait DataLayerWithMagLocators extends DataLayer { defaultViewConfiguration = defaultViewConfigurationMapping(l.defaultViewConfiguration), mags = l.mags.map(magMapping), name = name, - coordinateTransformations = coordinateTransformations + coordinateTransformations = coordinateTransformations, + attachments = l.attachments.map(attachmentMapping) ) case l: N5SegmentationLayer => l.copy( @@ -445,7 +449,8 @@ trait DataLayerWithMagLocators extends DataLayer { defaultViewConfiguration = defaultViewConfigurationMapping(l.defaultViewConfiguration), mags = l.mags.map(magMapping), name = name, - coordinateTransformations = coordinateTransformations + coordinateTransformations = coordinateTransformations, + attachments = l.attachments.map(attachmentMapping) ) case l: PrecomputedDataLayer => l.copy( @@ -453,7 +458,8 @@ trait DataLayerWithMagLocators extends DataLayer { defaultViewConfiguration = defaultViewConfigurationMapping(l.defaultViewConfiguration), mags = l.mags.map(magMapping), name = name, - coordinateTransformations = coordinateTransformations + coordinateTransformations = coordinateTransformations, + attachments = l.attachments.map(attachmentMapping) ) case l: PrecomputedSegmentationLayer => l.copy( @@ -461,7 +467,8 @@ trait DataLayerWithMagLocators extends DataLayer { defaultViewConfiguration = defaultViewConfigurationMapping(l.defaultViewConfiguration), mags = l.mags.map(magMapping), name = name, - coordinateTransformations = coordinateTransformations + coordinateTransformations = coordinateTransformations, + attachments = l.attachments.map(attachmentMapping) ) case l: Zarr3DataLayer => l.copy( @@ -469,7 +476,8 @@ trait DataLayerWithMagLocators extends DataLayer { defaultViewConfiguration = defaultViewConfigurationMapping(l.defaultViewConfiguration), mags = l.mags.map(magMapping), name = name, - coordinateTransformations = coordinateTransformations + coordinateTransformations = coordinateTransformations, + attachments = l.attachments.map(attachmentMapping) ) case l: Zarr3SegmentationLayer => l.copy( @@ -477,7 +485,8 @@ trait DataLayerWithMagLocators extends DataLayer { defaultViewConfiguration = defaultViewConfigurationMapping(l.defaultViewConfiguration), mags = l.mags.map(magMapping), name = name, - coordinateTransformations = coordinateTransformations + coordinateTransformations = coordinateTransformations, + attachments = l.attachments.map(attachmentMapping) ) case l: WKWDataLayer => l.copy( @@ -485,7 +494,8 @@ trait DataLayerWithMagLocators extends DataLayer { defaultViewConfiguration = defaultViewConfigurationMapping(l.defaultViewConfiguration), mags = l.mags.map(magMapping), name = name, - coordinateTransformations = coordinateTransformations + coordinateTransformations = coordinateTransformations, + attachments = l.attachments.map(attachmentMapping) ) case l: WKWSegmentationLayer => l.copy( @@ -493,7 +503,8 @@ trait DataLayerWithMagLocators extends DataLayer { defaultViewConfiguration = defaultViewConfigurationMapping(l.defaultViewConfiguration), mags = l.mags.map(magMapping), name = name, - coordinateTransformations = coordinateTransformations + coordinateTransformations = coordinateTransformations, + attachments = l.attachments.map(attachmentMapping) ) case _ => throw new Exception("Encountered unsupported layer format") } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala index cddd7687393..466627c3c9f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala @@ -50,6 +50,12 @@ object MagPathInfo { implicit val jsonFormat: OFormat[MagPathInfo] = Json.format[MagPathInfo] } +case class DataSourceRegistrationInfo(dataSource: DataSource, folderId: Option[String], dataStoreName: String) + +object DataSourceRegistrationInfo { + implicit val jsonFormat: OFormat[DataSourceRegistrationInfo] = Json.format[DataSourceRegistrationInfo] +} + trait RemoteWebknossosClient { def requestUserAccess(accessRequest: UserAccessRequest)(implicit tc: TokenContext): Fox[UserAccessAnswer] } @@ -144,6 +150,19 @@ class DSRemoteWebknossosClient @Inject()( .withTokenFromContext .putJson(dataSource) + def registerDataSource(dataSource: DataSource, dataSourceId: DataSourceId, folderId: Option[String])( + implicit tc: TokenContext): Fox[ObjectId] = + for { + _ <- Fox.successful(()) + info = DataSourceRegistrationInfo(dataSource, folderId, dataStoreName) + response <- rpc( + s"$webknossosUri/api/datastores/$dataStoreName/datasources/${dataSourceId.organizationId}/${dataSourceId.directoryName}") + .addQueryString("key" -> dataStoreKey) + .withTokenFromContext + .postJson[DataSourceRegistrationInfo](info) + datasetId <- ObjectId.fromString(response.body) + } yield datasetId + def deleteDataSource(id: DataSourceId): Fox[_] = rpc(s"$webknossosUri/api/datastores/$dataStoreName/deleteDataset") .addQueryString("key" -> dataStoreKey) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index 5e8568dc26e..c4b6c958209 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -365,6 +365,54 @@ class DataSourceService @Inject()( } } + // Replace relative paths with absolute paths + // TODO: Rename method + def replacePaths(dataSource: InboxDataSource, newBasePath: String): Fox[DataSource] = { + val replaceUri = (uri: URI) => { + val isRelativeFilePath = (uri.getScheme == null || uri.getScheme.isEmpty || uri.getScheme == DataVaultService.schemeFile) && !uri.isAbsolute + uri.getPath match { + // TODO: Does this make sense? + case pathStr if isRelativeFilePath => + new URI(uri.getScheme, + uri.getUserInfo, + uri.getHost, + uri.getPort, + newBasePath + pathStr, + uri.getQuery, + uri.getFragment) + case _ => uri + } + } + + dataSource.toUsable match { + case Some(usableDataSource) => + val updatedDataLayers = usableDataSource.dataLayers.map { + case layerWithMagLocators: DataLayerWithMagLocators => + layerWithMagLocators.mapped( + identity, + identity, + mag => + mag.path match { + case Some(pathStr) => mag.copy(path = Some(replaceUri(new URI(pathStr)).toString)) + case _ => mag + }, + attachmentMapping = attachment => + DatasetLayerAttachments( + attachment.meshes.map(a => a.copy(path = replaceUri(a.path))), + attachment.agglomerates.map(a => a.copy(path = replaceUri(a.path))), + attachment.segmentIndex.map(a => a.copy(path = replaceUri(a.path))), + attachment.connectomes.map(a => a.copy(path = replaceUri(a.path))), + attachment.cumsum.map(a => a.copy(path = replaceUri(a.path))) + ) + ) + case layer => layer + } + Fox.successful(usableDataSource.copy(dataLayers = updatedDataLayers)) + case None => + Fox.failure("Cannot replace paths of unusable datasource") + } + } + private def scanForAttachedFiles(dataSourcePath: Path, dataSource: DataSource) = dataSource.dataLayers.map(dataLayer => { val dataLayerPath = dataSourcePath.resolve(dataLayer.name) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 1432170256c..ee6c0b66f70 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -1,11 +1,13 @@ package com.scalableminds.webknossos.datastore.services.uploading import com.google.inject.Inject +import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.io.PathUtils.ensureDirectoryBox import com.scalableminds.util.io.{PathUtils, ZipIO} import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.tools.Box.tryo import com.scalableminds.util.tools._ +import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.dataformats.layers._ import com.scalableminds.webknossos.datastore.dataformats.wkw.WKWDataFormatHelper import com.scalableminds.webknossos.datastore.datareaders.n5.N5Header.FILENAME_ATTRIBUTES_JSON @@ -19,14 +21,36 @@ import com.scalableminds.webknossos.datastore.models.UnfinishedUpload import com.scalableminds.webknossos.datastore.models.datasource.GenericDataSource.FILENAME_DATASOURCE_PROPERTIES_JSON import com.scalableminds.webknossos.datastore.models.datasource._ import com.scalableminds.webknossos.datastore.services.{DSRemoteWebknossosClient, DataSourceService} -import com.scalableminds.webknossos.datastore.storage.{DataStoreRedisStore, RemoteSourceDescriptorService} +import com.scalableminds.webknossos.datastore.storage.{ + CredentialConfigReader, + DataStoreRedisStore, + RemoteSourceDescriptorService, + S3AccessKeyCredential +} import com.typesafe.scalalogging.LazyLogging import org.apache.commons.io.FileUtils import play.api.libs.json.{Json, OFormat, Reads} +import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} +import software.amazon.awssdk.core.async.AsyncRequestBody +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.s3.S3AsyncClient +import software.amazon.awssdk.services.s3.model.{ + CompleteMultipartUploadRequest, + CompletedMultipartUpload, + CompletedPart, + CreateMultipartUploadRequest, + PutObjectRequest, + UploadPartRequest +} import java.io.{File, RandomAccessFile} +import java.net.URI +import java.util import java.nio.file.{Files, Path} +import java.util.stream.{Collectors, StreamSupport} import scala.concurrent.{ExecutionContext, Future} +import scala.jdk.CollectionConverters.IterableHasAsJava +import scala.jdk.FutureConverters._ case class ReserveUploadInformation( uploadId: String, // upload id that was also used in chunk upload (this time without file paths) @@ -110,6 +134,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, remoteSourceDescriptorService: RemoteSourceDescriptorService, exploreLocalLayerService: ExploreLocalLayerService, datasetSymlinkService: DatasetSymlinkService, + dataStoreConfig: DataStoreConfig, val remoteWebknossosClient: DSRemoteWebknossosClient)(implicit ec: ExecutionContext) extends DatasetDeleter with DirectoryConstants @@ -146,6 +171,10 @@ class UploadService @Inject()(dataSourceService: DataSourceService, s"upload___${Json.stringify(Json.toJson(datasourceId))}___datasourceId" private def redisKeyForFilePaths(uploadId: String): String = s"upload___${uploadId}___filePaths" + private def redisKeyForS3MultipartUploadId(uploadId: String, fileName: String): String = + s"upload___${uploadId}___file___${fileName}___s3MultipartUploadId" + private def redisKeyForS3PartETag(uploadId: String, fileName: String, partNumber: Long): String = + s"upload___${uploadId}___file___${fileName}___partETag___$partNumber" cleanUpOrphanUploads() @@ -161,6 +190,9 @@ class UploadService @Inject()(dataSourceService: DataSourceService, private def uploadDirectory(organizationId: String, uploadId: String): Path = dataBaseDir.resolve(organizationId).resolve(uploadingDir).resolve(uploadId) + private def s3UploadDirectory(organizationId: String, uploadId: String): Path = + dataBaseDir.resolve(organizationId).resolve(uploadingDir).resolve(uploadToS3Dir).resolve(uploadId) + def getDataSourceIdByUploadId(uploadId: String): Fox[DataSourceId] = getObjectFromRedis[DataSourceId](redisKeyForDataSourceId(uploadId)) @@ -306,6 +338,169 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } else Fox.successful(()) } + lazy val s3UploadCredentialsOpt: Option[(String, String)] = dataStoreConfig.Datastore.DataVaults.credentials.flatMap { + credentialConfig => + new CredentialConfigReader(credentialConfig).getCredential + }.map { + case S3AccessKeyCredential(name, accessKeyId, secretAccessKey, _, _) => + (name, accessKeyId, secretAccessKey) + case _ => ("INVALID", "", "") // TODO: This is not very nice. + // TODO: Does it make sense to reuse the DataVault global credential here? + }.filter(c => dataStoreConfig.Datastore.S3Upload.credentialName == c._1).map(c => (c._2, c._3)).headOption + + private lazy val s3Client: S3AsyncClient = S3AsyncClient + .builder() + .credentialsProvider( + StaticCredentialsProvider.create( + AwsBasicCredentials.builder + .accessKeyId(s3UploadCredentialsOpt.getOrElse(("", ""))._1) + .secretAccessKey(s3UploadCredentialsOpt.getOrElse(("", ""))._2) + .build() + )) + .crossRegionAccessEnabled(true) + .forcePathStyle(true) + .endpointOverride(new URI(dataStoreConfig.Datastore.S3Upload.endpoint)) + .region(Region.US_EAST_1) + .build() + + def handleUploadChunkAws( + uploadFileId: String, + chunkSize: Long, + currentChunkSize: Long, + totalChunkCount: Long, + currentChunkNumber: Long, + chunkFile: File, + bucketName: String, + objectKey: String + ): Fox[Unit] = { + val uploadId = extractDatasetUploadId(uploadFileId) + + def getAllPartETags(uploadId: String, filePath: String, totalChunkCount: Long): Fox[Vector[(Int, String)]] = + for { + possibleEtags <- Fox.combined( + (1L to totalChunkCount).map(i => + runningUploadMetadataStore.find(redisKeyForS3PartETag(uploadId, filePath, i))) + ) + etagsWithIndex = possibleEtags.zipWithIndex + foundEtags = etagsWithIndex.collect { + case (Some(etag), idx) => (idx + 1, etag) // partNumber starts at 1 + } + } yield foundEtags.toVector + + for { + dataSourceId <- getDataSourceIdByUploadId(uploadId) + (filePath, uploadDir) <- getFilePathAndDirOfUploadId(uploadFileId) + + isFileKnown <- runningUploadMetadataStore.contains(redisKeyForFileChunkCount(uploadId, filePath)) + totalFileSizeInBytesOpt <- runningUploadMetadataStore.findLong(redisKeyForTotalFileSizeInBytes(uploadId)) + + _ <- Fox.runOptional(totalFileSizeInBytesOpt) { maxFileSize => + runningUploadMetadataStore + .increaseBy(redisKeyForCurrentUploadedTotalFileSizeInBytes(uploadId), currentChunkSize) + .flatMap(newTotalFileSizeInBytesOpt => { + if (newTotalFileSizeInBytesOpt.getOrElse(0L) > maxFileSize) { + cleanUpDatasetExceedingSize(uploadDir, uploadId).flatMap(_ => + Fox.failure("dataset.upload.moreBytesThanReserved")) + } else Fox.successful(()) + }) + } + + // Initialize multipart upload on first chunk + _ <- Fox.runIf(!isFileKnown) { + for { + _ <- runningUploadMetadataStore.insertIntoSet(redisKeyForFileNameSet(uploadId), filePath) + _ <- runningUploadMetadataStore.insert( + redisKeyForFileChunkCount(uploadId, filePath), + String.valueOf(totalChunkCount) + ) + // Start multipart upload + createResp <- Fox.fromFuture { + s3Client + .createMultipartUpload( + CreateMultipartUploadRequest.builder().bucket(bucketName).key(objectKey).build() + ) + .asScala + } + _ <- runningUploadMetadataStore.insert( + redisKeyForS3MultipartUploadId(uploadId, filePath), + createResp.uploadId() + ) + } yield () + } + + isNewChunk <- runningUploadMetadataStore.insertIntoSet( + redisKeyForFileChunkSet(uploadId, filePath), + String.valueOf(currentChunkNumber) + ) + + } yield { + if (isNewChunk) { + try { + val bytes = Files.readAllBytes(chunkFile.toPath) + for { + s3UploadIdOpt <- runningUploadMetadataStore.find(redisKeyForS3MultipartUploadId(uploadId, filePath)) + s3UploadId <- s3UploadIdOpt.toFox ?~> s"No multipart uploadId found for $filePath" + + // Upload part to S3 + uploadResp <- Fox.fromFuture { + s3Client + .uploadPart( + UploadPartRequest + .builder() + .bucket(bucketName) + .key(objectKey) + .uploadId(s3UploadId) + .partNumber(currentChunkNumber.toInt) + .contentLength(currentChunkSize) + .build(), + software.amazon.awssdk.core.async.AsyncRequestBody.fromBytes(bytes) + ) + .asScala + } + + // Store ETag for later completion + _ <- runningUploadMetadataStore.insert( + redisKeyForS3PartETag(uploadId, filePath, currentChunkNumber), + uploadResp.eTag() + ) + + // Complete multipart upload if all chunks uploaded + _ <- Fox.runIf(currentChunkNumber == totalChunkCount) { + for { + eTags <- getAllPartETags(uploadId, filePath, totalChunkCount) + completedParts: util.List[CompletedPart] = StreamSupport + .stream(eTags.map { + case (partNum, etag) => + CompletedPart.builder().partNumber(partNum).eTag(etag).build() + }.asJava.spliterator(), false) + .collect(Collectors.toList()) + completeReq = CompleteMultipartUploadRequest + .builder() + .bucket(bucketName) + .key(objectKey) + .uploadId(s3UploadId) + .multipartUpload( + CompletedMultipartUpload.builder().parts(completedParts).build() + ) + .build() + _ <- Fox.fromFuture(s3Client.completeMultipartUpload(completeReq).asScala) + } yield () + } + } yield Fox.successful(()) + + } catch { + case e: Exception => + runningUploadMetadataStore.removeFromSet(redisKeyForFileChunkSet(uploadId, filePath), + String.valueOf(currentChunkNumber)) + val errorMsg = + s"Error receiving chunk $currentChunkNumber for upload ${dataSourceId.directoryName}: ${e.getMessage}" + logger.warn(errorMsg) + Fox.failure(errorMsg) + } + } else Fox.successful(()) + } + } + def cancelUpload(cancelUploadInformation: CancelUploadInformation): Fox[Unit] = { val uploadId = cancelUploadInformation.uploadId for { @@ -324,14 +519,16 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } } - def finishUpload(uploadInformation: UploadInformation, checkCompletion: Boolean = true): Fox[(DataSourceId, Long)] = { + def finishUpload(uploadInformation: UploadInformation, checkCompletion: Boolean = true)( + implicit tc: TokenContext): Fox[(DataSourceId, Long)] = { val uploadId = uploadInformation.uploadId for { dataSourceId <- getDataSourceIdByUploadId(uploadId) datasetNeedsConversion = uploadInformation.needsConversion.getOrElse(false) uploadDir = uploadDirectory(dataSourceId.organizationId, uploadId) - unpackToDir = dataSourceDirFor(dataSourceId, datasetNeedsConversion) + uploadToS3 = dataStoreConfig.Datastore.S3Upload.enabled + totalFileSizeInBytesOpt <- runningUploadMetadataStore.find(redisKeyForTotalFileSizeInBytes(uploadId)) _ <- Fox.runOptional(totalFileSizeInBytesOpt) { maxFileSize => tryo(FileUtils.sizeOfDirectoryAsBigInteger(uploadDir.toFile).longValue).toFox.map(actualFileSize => @@ -344,6 +541,8 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ = logger.info( s"Finishing dataset upload of ${dataSourceId.organizationId}/${dataSourceId.directoryName} with id $uploadId...") _ <- Fox.runIf(checkCompletion)(ensureAllChunksUploaded(uploadId)) + + unpackToDir = dataSourceDirFor(dataSourceId, datasetNeedsConversion, uploadToS3) _ <- ensureDirectoryBox(unpackToDir.getParent).toFox ?~> "dataset.import.fileAccessDenied" unpackResult <- unpackDataset(uploadDir, unpackToDir).shiftBox linkedLayerInfo <- getObjectFromRedis[LinkedLayerIdentifiers](redisKeyForLinkedLayerIdentifier(uploadId)) @@ -356,13 +555,36 @@ class UploadService @Inject()(dataSourceService: DataSourceService, unpackToDir, dataSourceId, linkedLayerInfo.layersToLink).shiftBox + // Post-processing needs to be handled differently for s3 uploads? _ <- cleanUpOnFailure(postProcessingResult, dataSourceId, datasetNeedsConversion, label = s"processing dataset at $unpackToDir") dataSource = dataSourceService.dataSourceFromDir(unpackToDir, dataSourceId.organizationId) - _ <- remoteWebknossosClient.reportDataSource(dataSource) - datasetSizeBytes <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox + datasetSizeBytes: Long <- if (uploadToS3) { + for { + _ <- Fox.successful(()) + + s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/${uploadId}/" + _ <- uploadDirectoryToS3(unpackToDir, dataStoreConfig.Datastore.S3Upload.bucketName, s3ObjectKey) + endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.endpoint).getHost + s3DataSource <- dataSourceService.replacePaths( + dataSource, + newBasePath = s"s3://$endPointHost/${dataStoreConfig.Datastore.S3Upload.bucketName}/$s3ObjectKey") + // TODO: Handle folder id + _ <- remoteWebknossosClient.registerDataSource(s3DataSource, dataSourceId, None) + // TODO: Is uploaded dataset size the same as local dataset size? + datasetSize <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox + _ = this.synchronized { + PathUtils.deleteDirectoryRecursively(unpackToDir) + } + } yield datasetSize + } else { + for { + _ <- remoteWebknossosClient.reportDataSource(dataSource) + datasetSize <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox + } yield datasetSize + } } yield (dataSourceId, datasetSizeBytes) } @@ -438,6 +660,39 @@ class UploadService @Inject()(dataSourceService: DataSourceService, exploreLocalLayerService.writeLocalDatasourceProperties(dataSource, path)) } yield path + private def uploadDirectoryToS3( + dataDir: Path, + bucketName: String, + prefix: String + ): Fox[Unit] = + for { + files <- PathUtils.listFilesRecursive(dataDir, silent = false, maxDepth = 20).toFox + uploadFoxes = files.map(filePath => { + val relPath = dataDir.relativize(filePath).toString.replace("\\", "/") + val s3Key = s"$prefix$relPath" + + // TODO: For large files, consider using multipart upload + logger.info("Uploading file to S3: " + filePath) + val bytes = Files.readAllBytes(filePath) + logger.info(s"Dataset Upload: Uploading ${bytes.length} bytes to s3://$bucketName/$s3Key") + val startTime = System.currentTimeMillis() + logger.info(s"Starting upload of $filePath to S3 at $startTime") + + for { + _ <- Fox.fromFuture { + s3Client + .putObject( + PutObjectRequest.builder().bucket(bucketName).key(s3Key).build(), + AsyncRequestBody.fromBytes(bytes) + ) + .asScala + } ?~> s"Failed to upload file $filePath to S3" + } yield () + }) + _ <- Fox.combined(uploadFoxes.toList) + _ = logger.info(s"Finished uploading directory to S3 at ${System.currentTimeMillis()}") + } yield () + private def cleanUpOnFailure[T](result: Box[T], dataSourceId: DataSourceId, datasetNeedsConversion: Boolean, @@ -483,12 +738,19 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ <- Fox.fromBool(list.forall(identity)) } yield () - private def dataSourceDirFor(dataSourceId: DataSourceId, datasetNeedsConversion: Boolean): Path = { - val dataSourceDir = - if (datasetNeedsConversion) - dataBaseDir.resolve(dataSourceId.organizationId).resolve(forConversionDir).resolve(dataSourceId.directoryName) - else - dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + private def dataSourceDirFor(dataSourceId: DataSourceId, + datasetNeedsConversion: Boolean, + uploadToS3: Boolean): Path = { + val dataSourceDir = { + if (uploadToS3) + s3UploadDirectory(dataSourceId.organizationId, dataSourceId.directoryName) + else { + if (datasetNeedsConversion) + dataBaseDir.resolve(dataSourceId.organizationId).resolve(forConversionDir).resolve(dataSourceId.directoryName) + else + dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + } + } dataSourceDir } From f2dcd816cc507d204102d0ab9140a3e11f265282 Mon Sep 17 00:00:00 2001 From: frcroth Date: Thu, 11 Sep 2025 11:47:15 +0200 Subject: [PATCH 02/62] Fix that uploads would have 'aws-chunked' content encoding in files --- .../webknossos/datastore/datavault/Encoding.scala | 13 ++++++------- .../services/uploading/UploadService.scala | 2 ++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/Encoding.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/Encoding.scala index ac7b1f98f21..84157e7d413 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/Encoding.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/Encoding.scala @@ -12,12 +12,11 @@ object Encoding extends ExtendedEnumeration { // List of possible entries: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding def fromRfc7231String(s: String): Box[Encoding] = s match { - case "gzip" => Full(gzip) - case "x-gzip" => Full(gzip) - case "br" => Full(brotli) - case "identity" => Full(identity) - case "" => Full(identity) - case "aws-chunked" => Full(identity) // TODO: Does this work? - case _ => Failure(s"Unsupported encoding: $s") + case "gzip" => Full(gzip) + case "x-gzip" => Full(gzip) + case "br" => Full(brotli) + case "identity" => Full(identity) + case "" => Full(identity) + case _ => Failure(s"Unsupported encoding: $s") } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index ee6c0b66f70..21799fbb20b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -32,6 +32,7 @@ import org.apache.commons.io.FileUtils import play.api.libs.json.{Json, OFormat, Reads} import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} import software.amazon.awssdk.core.async.AsyncRequestBody +import software.amazon.awssdk.core.checksums.RequestChecksumCalculation import software.amazon.awssdk.regions.Region import software.amazon.awssdk.services.s3.S3AsyncClient import software.amazon.awssdk.services.s3.model.{ @@ -361,6 +362,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, .forcePathStyle(true) .endpointOverride(new URI(dataStoreConfig.Datastore.S3Upload.endpoint)) .region(Region.US_EAST_1) + .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) .build() def handleUploadChunkAws( From 6e62928ca4b60d14013a8241359e47098a7e03e3 Mon Sep 17 00:00:00 2001 From: frcroth Date: Thu, 11 Sep 2025 15:37:29 +0200 Subject: [PATCH 03/62] Use update data source route for uploading virtual datasets --- app/controllers/DatasetController.scala | 6 ++- .../WKRemoteDataStoreController.scala | 40 ++----------------- app/models/dataset/DatasetService.scala | 5 ++- conf/webknossos.latest.routes | 1 - .../controllers/DataSourceController.scala | 23 ++++------- .../models/datasource/DataLayer.scala | 3 +- .../services/DSRemoteWebknossosClient.scala | 13 ------ .../services/uploading/UploadService.scala | 40 ++++++++++++------- 8 files changed, 46 insertions(+), 85 deletions(-) diff --git a/app/controllers/DatasetController.scala b/app/controllers/DatasetController.scala index 8d9c7c51ca8..393470c9c42 100755 --- a/app/controllers/DatasetController.scala +++ b/app/controllers/DatasetController.scala @@ -11,7 +11,11 @@ import com.scalableminds.webknossos.datastore.models.datasource.ElementClass import mail.{MailchimpClient, MailchimpTag} import models.analytics.{AnalyticsService, ChangeDatasetSettingsEvent, OpenDatasetEvent} import models.dataset._ -import models.dataset.explore.{ExploreAndAddRemoteDatasetParameters, WKExploreRemoteLayerParameters, WKExploreRemoteLayerService} +import models.dataset.explore.{ + ExploreAndAddRemoteDatasetParameters, + WKExploreRemoteLayerParameters, + WKExploreRemoteLayerService +} import models.folder.FolderService import models.organization.OrganizationDAO import models.team.{TeamDAO, TeamService} diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index 000f7b5a6e6..5ed1f02c3fd 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -9,7 +9,7 @@ import com.scalableminds.webknossos.datastore.helpers.{LayerMagLinkInfo, MagLink import com.scalableminds.webknossos.datastore.models.UnfinishedUpload import com.scalableminds.webknossos.datastore.models.datasource.{AbstractDataLayer, DataSource, DataSourceId} import com.scalableminds.webknossos.datastore.models.datasource.inbox.{InboxDataSourceLike => InboxDataSource} -import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataSourceRegistrationInfo, DataStoreStatus} +import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataStoreStatus} import com.scalableminds.webknossos.datastore.services.uploading.{ LinkedLayerIdentifier, ReserveAdditionalInformation, @@ -89,7 +89,8 @@ class WKRemoteDataStoreController @Inject()( uploadInfo.name, uploadInfo.organization, dataStore, - uploadInfo.requireUniqueName.getOrElse(false)) ?~> "dataset.upload.creation.failed" + uploadInfo.requireUniqueName.getOrElse(false), + uploadInfo.isVirtual.getOrElse(false)) ?~> "dataset.upload.creation.failed" _ <- datasetDAO.updateFolder(dataset._id, folderId)(GlobalAccessContext) _ <- datasetService.addInitialTeams(dataset, uploadInfo.initialTeams, user)(AuthorizedAccessContext(user)) _ <- datasetService.addUploader(dataset, user._id)(AuthorizedAccessContext(user)) @@ -301,41 +302,6 @@ class WKRemoteDataStoreController @Inject()( } - // Register a datasource from the datastore as a dataset in the database. - // This is called when adding remote virtual datasets (that should only exist in the database) - // by the data store after exploration. - def registerDataSource(name: String, - key: String, - organizationId: String, - directoryName: String, - token: String): Action[DataSourceRegistrationInfo] = - Action.async(validateJson[DataSourceRegistrationInfo]) { implicit request => - dataStoreService.validateAccess(name, key) { dataStore => - for { - user <- bearerTokenService.userForToken(token) - organization <- organizationDAO.findOne(organizationId)(GlobalAccessContext) ?~> Messages( - "organization.notFound", - organizationId) ~> NOT_FOUND - _ <- Fox.fromBool(organization._id == user._organization) ?~> "notAllowed" ~> FORBIDDEN - existingDatasetOpt <- Fox.fromFuture( - datasetDAO - .findOneByDirectoryNameAndOrganization(directoryName, organization._id)(GlobalAccessContext) - .toFutureOption) - // Uploading creates an unusable dataset first, here we delete it if it exists. - _ <- existingDatasetOpt - .map(existingDataset => datasetDAO.deleteDataset(existingDataset._id, onlyMarkAsDeleted = false)) - .getOrElse(Fox.successful(())) - dataset <- datasetService.createVirtualDataset( - directoryName, - dataStore, - request.body.dataSource, - request.body.folderId, - user - ) - } yield Ok(dataset._id.toString) - } - } - def updateDataSource(name: String, key: String, datasetId: ObjectId, allowNewPaths: Boolean): Action[DataSource] = Action.async(validateJson[DataSource]) { implicit request => dataStoreService.validateAccess(name, key) { _ => diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index 274d2d6d062..ff55b49562a 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -88,7 +88,8 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, def createPreliminaryDataset(datasetName: String, organizationId: String, dataStore: DataStore, - requireUniqueName: Boolean): Fox[Dataset] = { + requireUniqueName: Boolean, + isVirtual: Boolean): Fox[Dataset] = { val newDatasetId = ObjectId.generate for { isDatasetNameAlreadyTaken <- datasetDAO.doesDatasetDirectoryExistInOrganization(datasetName, organizationId)( @@ -97,7 +98,7 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, datasetDirectoryName = if (isDatasetNameAlreadyTaken) s"$datasetName-${newDatasetId.toString}" else datasetName unreportedDatasource = UnusableDataSource(DataSourceId(datasetDirectoryName, organizationId), notYetUploadedStatus) - newDataset <- createDataset(dataStore, newDatasetId, datasetName, unreportedDatasource) + newDataset <- createDataset(dataStore, newDatasetId, datasetName, unreportedDatasource, isVirtual = isVirtual) } yield newDataset } diff --git a/conf/webknossos.latest.routes b/conf/webknossos.latest.routes index 8f1c711d00a..951edc501a8 100644 --- a/conf/webknossos.latest.routes +++ b/conf/webknossos.latest.routes @@ -124,7 +124,6 @@ PUT /datastores/:name/datasources/paths GET /datastores/:name/datasources/:datasetId/paths controllers.WKRemoteDataStoreController.getPaths(name: String, key: String, datasetId: ObjectId) GET /datastores/:name/datasources/:datasetId controllers.WKRemoteDataStoreController.getDataSource(name: String, key: String, datasetId: ObjectId) PUT /datastores/:name/datasources/:datasetId controllers.WKRemoteDataStoreController.updateDataSource(name: String, key: String, datasetId: ObjectId, allowNewPaths: Boolean) -POST /datastores/:name/datasources/:organizationId/:directoryName controllers.WKRemoteDataStoreController.registerDataSource(name: String, key: String, organizationId: String, directoryName: String, token: String) PATCH /datastores/:name/status controllers.WKRemoteDataStoreController.statusUpdate(name: String, key: String) POST /datastores/:name/reserveUpload controllers.WKRemoteDataStoreController.reserveDatasetUpload(name: String, key: String, token: String) GET /datastores/:name/getUnfinishedUploadsForUser controllers.WKRemoteDataStoreController.getUnfinishedUploadsForUser(name: String, key: String, token: String, organizationName: String) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 30e0da1e4b3..5aeb70c1bef 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -6,8 +6,8 @@ import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Box, Empty, Failure, Fox, FoxImplicits, Full} +import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.ListOfLong.ListOfLong -import com.scalableminds.webknossos.datastore.datavault.S3DataVault import com.scalableminds.webknossos.datastore.explore.{ ExploreRemoteDatasetRequest, ExploreRemoteDatasetResponse, @@ -67,6 +67,7 @@ class DataSourceController @Inject()( exploreRemoteLayerService: ExploreRemoteLayerService, uploadService: UploadService, meshFileService: MeshFileService, + dataStoreConfig: DataStoreConfig, remoteSourceDescriptorService: RemoteSourceDescriptorService, val dsRemoteWebknossosClient: DSRemoteWebknossosClient, val dsRemoteTracingstoreClient: DSRemoteTracingstoreClient, @@ -102,10 +103,12 @@ class DataSourceController @Inject()( UserAccessRequest.administrateDataSources(request.body.organization)) { for { isKnownUpload <- uploadService.isKnownUpload(request.body.uploadId) + shouldBeVirtual = dataStoreConfig.Datastore.S3Upload.enabled + reserveUploadInformation = request.body.copy(isVirtual = Some(shouldBeVirtual)) _ <- if (!isKnownUpload) { - (dsRemoteWebknossosClient.reserveDataSourceUpload(request.body) ?~> "dataset.upload.validation.failed") + (dsRemoteWebknossosClient.reserveDataSourceUpload(reserveUploadInformation) ?~> "dataset.upload.validation.failed") .flatMap(reserveUploadAdditionalInfo => - uploadService.reserveUpload(request.body, reserveUploadAdditionalInfo)) + uploadService.reserveUpload(reserveUploadInformation, reserveUploadAdditionalInfo)) } else Fox.successful(()) } yield Ok } @@ -141,7 +144,8 @@ class DataSourceController @Inject()( None, request.body.initialTeamIds, request.body.folderId, - Some(request.body.requireUniqueName) + Some(request.body.requireUniqueName), + Some(false) ) ) ?~> "dataset.upload.validation.failed" } yield @@ -197,17 +201,6 @@ class DataSourceController @Inject()( totalChunkCount, chunkNumber, new File(chunkFile.ref.path.toString)) - - /*_ <- uploadService.handleUploadChunkAws( - uploadFileId, - chunkSize, - currentChunkSize, - totalChunkCount, - chunkNumber, - new File(chunkFile.ref.path.toString), - "webknossos-test", - s"upload-tests/upload-test-${uploadFileId}", - )*/ } yield Ok } } yield result diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataLayer.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataLayer.scala index 4f18aedd2eb..a2a0228ec5b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataLayer.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataLayer.scala @@ -413,8 +413,7 @@ trait DataLayerWithMagLocators extends DataLayer { magMapping: MagLocator => MagLocator = m => m, name: String = this.name, coordinateTransformations: Option[List[CoordinateTransformation]] = this.coordinateTransformations, - attachmentMapping: DatasetLayerAttachments => DatasetLayerAttachments = a => a) - : DataLayerWithMagLocators = + attachmentMapping: DatasetLayerAttachments => DatasetLayerAttachments = a => a): DataLayerWithMagLocators = this match { case l: ZarrDataLayer => l.copy( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala index 466627c3c9f..f3214cbd080 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala @@ -150,19 +150,6 @@ class DSRemoteWebknossosClient @Inject()( .withTokenFromContext .putJson(dataSource) - def registerDataSource(dataSource: DataSource, dataSourceId: DataSourceId, folderId: Option[String])( - implicit tc: TokenContext): Fox[ObjectId] = - for { - _ <- Fox.successful(()) - info = DataSourceRegistrationInfo(dataSource, folderId, dataStoreName) - response <- rpc( - s"$webknossosUri/api/datastores/$dataStoreName/datasources/${dataSourceId.organizationId}/${dataSourceId.directoryName}") - .addQueryString("key" -> dataStoreKey) - .withTokenFromContext - .postJson[DataSourceRegistrationInfo](info) - datasetId <- ObjectId.fromString(response.body) - } yield datasetId - def deleteDataSource(id: DataSourceId): Fox[_] = rpc(s"$webknossosUri/api/datastores/$dataStoreName/deleteDataset") .addQueryString("key" -> dataStoreKey) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 21799fbb20b..320484ecacd 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -63,7 +63,8 @@ case class ReserveUploadInformation( layersToLink: Option[List[LinkedLayerIdentifier]], initialTeams: List[String], // team ids folderId: Option[String], - requireUniqueName: Option[Boolean]) + requireUniqueName: Option[Boolean], + isVirtual: Option[Boolean]) object ReserveUploadInformation { implicit val reserveUploadInformation: OFormat[ReserveUploadInformation] = Json.format[ReserveUploadInformation] } @@ -170,6 +171,8 @@ class UploadService @Inject()(dataSourceService: DataSourceService, s"upload___${uploadId}___file___${fileName}___chunkSet" private def redisKeyForUploadId(datasourceId: DataSourceId): String = s"upload___${Json.stringify(Json.toJson(datasourceId))}___datasourceId" + private def redisKeyForDatasetId(uploadId: String): String = + s"upload___${uploadId}___datasetId" private def redisKeyForFilePaths(uploadId: String): String = s"upload___${uploadId}___filePaths" private def redisKeyForS3MultipartUploadId(uploadId: String, fileName: String): String = @@ -197,6 +200,9 @@ class UploadService @Inject()(dataSourceService: DataSourceService, def getDataSourceIdByUploadId(uploadId: String): Fox[DataSourceId] = getObjectFromRedis[DataSourceId](redisKeyForDataSourceId(uploadId)) + def getDatasetIdByUploadId(uploadId: String): Fox[ObjectId] = + getObjectFromRedis[ObjectId](redisKeyForDatasetId(uploadId)) + def reserveUpload(reserveUploadInfo: ReserveUploadInformation, reserveUploadAdditionalInfo: ReserveAdditionalInformation): Fox[Unit] = for { @@ -217,6 +223,10 @@ class UploadService @Inject()(dataSourceService: DataSourceService, Json.stringify( Json.toJson(DataSourceId(reserveUploadAdditionalInfo.directoryName, reserveUploadInfo.organization))) ) + _ <- runningUploadMetadataStore.insert( + redisKeyForDatasetId(reserveUploadInfo.uploadId), + Json.stringify(Json.toJson(reserveUploadAdditionalInfo.newDatasetId)) + ) _ <- runningUploadMetadataStore.insert( redisKeyForUploadId(DataSourceId(reserveUploadAdditionalInfo.directoryName, reserveUploadInfo.organization)), reserveUploadInfo.uploadId @@ -339,16 +349,14 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } else Fox.successful(()) } - lazy val s3UploadCredentialsOpt: Option[(String, String)] = dataStoreConfig.Datastore.DataVaults.credentials.flatMap { - credentialConfig => + private lazy val s3UploadCredentialsOpt: Option[(String, String)] = + dataStoreConfig.Datastore.DataVaults.credentials.flatMap { credentialConfig => new CredentialConfigReader(credentialConfig).getCredential - }.map { - case S3AccessKeyCredential(name, accessKeyId, secretAccessKey, _, _) => - (name, accessKeyId, secretAccessKey) - case _ => ("INVALID", "", "") // TODO: This is not very nice. - // TODO: Does it make sense to reuse the DataVault global credential here? - }.filter(c => dataStoreConfig.Datastore.S3Upload.credentialName == c._1).map(c => (c._2, c._3)).headOption - + }.collectFirst { + case S3AccessKeyCredential(credentialName, accessKeyId, secretAccessKey, _, _) + if dataStoreConfig.Datastore.S3Upload.credentialName == credentialName => + (accessKeyId, secretAccessKey) + } private lazy val s3Client: S3AsyncClient = S3AsyncClient .builder() .credentialsProvider( @@ -362,6 +370,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, .forcePathStyle(true) .endpointOverride(new URI(dataStoreConfig.Datastore.S3Upload.endpoint)) .region(Region.US_EAST_1) + // Disabling checksum calculation prevents files being stored with Content Encoding "aws-chunked". .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) .build() @@ -548,6 +557,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ <- ensureDirectoryBox(unpackToDir.getParent).toFox ?~> "dataset.import.fileAccessDenied" unpackResult <- unpackDataset(uploadDir, unpackToDir).shiftBox linkedLayerInfo <- getObjectFromRedis[LinkedLayerIdentifiers](redisKeyForLinkedLayerIdentifier(uploadId)) + datasetId <- getDatasetIdByUploadId(uploadId) _ <- cleanUpUploadedDataset(uploadDir, uploadId) _ <- cleanUpOnFailure(unpackResult, dataSourceId, @@ -567,14 +577,13 @@ class UploadService @Inject()(dataSourceService: DataSourceService, for { _ <- Fox.successful(()) - s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/${uploadId}/" + s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/$uploadId/" _ <- uploadDirectoryToS3(unpackToDir, dataStoreConfig.Datastore.S3Upload.bucketName, s3ObjectKey) endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.endpoint).getHost s3DataSource <- dataSourceService.replacePaths( dataSource, newBasePath = s"s3://$endPointHost/${dataStoreConfig.Datastore.S3Upload.bucketName}/$s3ObjectKey") - // TODO: Handle folder id - _ <- remoteWebknossosClient.registerDataSource(s3DataSource, dataSourceId, None) + _ <- remoteWebknossosClient.updateDataSource(s3DataSource, datasetId, allowNewPaths = true) // TODO: Is uploaded dataset size the same as local dataset size? datasetSize <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox _ = this.synchronized { @@ -691,7 +700,8 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } ?~> s"Failed to upload file $filePath to S3" } yield () }) - _ <- Fox.combined(uploadFoxes.toList) + // TODO: Limit number of concurrent uploads? + _ <- Fox.combined(uploadFoxes) _ = logger.info(s"Finished uploading directory to S3 at ${System.currentTimeMillis()}") } yield () @@ -973,6 +983,8 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ <- runningUploadMetadataStore.remove(redisKeyForCurrentUploadedTotalFileSizeInBytes(uploadId)) dataSourceId <- getDataSourceIdByUploadId(uploadId) _ <- runningUploadMetadataStore.remove(redisKeyForDataSourceId(uploadId)) + _ <- runningUploadMetadataStore.remove(redisKeyForDatasetId(uploadId)) + // TODO: Remove S3 multipart upload if present _ <- runningUploadMetadataStore.remove(redisKeyForLinkedLayerIdentifier(uploadId)) _ <- runningUploadMetadataStore.remove(redisKeyForUploadId(dataSourceId)) _ <- runningUploadMetadataStore.remove(redisKeyForFilePaths(uploadId)) From f90002eeab7e21cfcbf951391e2dfae3b52ac51e Mon Sep 17 00:00:00 2001 From: frcroth Date: Thu, 11 Sep 2025 17:01:29 +0200 Subject: [PATCH 04/62] Use S3 transfer manager for file uploads --- app/controllers/DatasetController.scala | 9 +++- project/Dependencies.scala | 5 ++- .../services/DSRemoteWebknossosClient.scala | 6 --- .../services/uploading/UploadService.scala | 41 ++++++------------- 4 files changed, 23 insertions(+), 38 deletions(-) diff --git a/app/controllers/DatasetController.scala b/app/controllers/DatasetController.scala index 393470c9c42..c697cabceae 100755 --- a/app/controllers/DatasetController.scala +++ b/app/controllers/DatasetController.scala @@ -7,7 +7,7 @@ import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Fox, TristateOptionJsonHelper} import com.scalableminds.webknossos.datastore.models.AdditionalCoordinate -import com.scalableminds.webknossos.datastore.models.datasource.ElementClass +import com.scalableminds.webknossos.datastore.models.datasource.{DataSource, ElementClass} import mail.{MailchimpClient, MailchimpTag} import models.analytics.{AnalyticsService, ChangeDatasetSettingsEvent, OpenDatasetEvent} import models.dataset._ @@ -21,7 +21,6 @@ import models.organization.OrganizationDAO import models.team.{TeamDAO, TeamService} import models.user.{User, UserDAO, UserService} import com.scalableminds.util.tools.{Empty, Failure, Full} -import com.scalableminds.webknossos.datastore.services.DataSourceRegistrationInfo import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.functional.syntax._ import play.api.libs.json._ @@ -72,6 +71,12 @@ object SegmentAnythingMaskParameters { implicit val jsonFormat: Format[SegmentAnythingMaskParameters] = Json.format[SegmentAnythingMaskParameters] } +case class DataSourceRegistrationInfo(dataSource: DataSource, folderId: Option[String], dataStoreName: String) + +object DataSourceRegistrationInfo { + implicit val jsonFormat: OFormat[DataSourceRegistrationInfo] = Json.format[DataSourceRegistrationInfo] +} + class DatasetController @Inject()(userService: UserService, userDAO: UserDAO, datasetService: DatasetService, diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 67d70bd37e7..51dbd977c27 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -5,6 +5,7 @@ object Dependencies { private val silhouetteVersion = "10.0.3" private val brotliVersion = "1.19.0" private val slickVersion = "3.5.2" + private val awsVersion = "2.32.24" private val scalapbVersion = scalapb.compiler.Version.scalapbVersion private val grpcVersion = scalapb.compiler.Version.grpcJavaVersion @@ -55,7 +56,9 @@ object Dependencies { // MultiArray (ndarray) handles. import ucar "edu.ucar" % "cdm-core" % "5.4.2", // Amazon S3 cloud storage client. import software.amazon.awssdk - "software.amazon.awssdk" % "s3" % "2.32.24", + "software.amazon.awssdk" % "s3" % awsVersion, + // AWS Transfer Manager for multipart uploads. import software.amazon.awssdk.transfer.s3 + "software.amazon.awssdk" % "s3-transfer-manager" % awsVersion, // Google cloud storage client. import com.google.cloud.storage, import com.google.auth.oauth2 "com.google.cloud" % "google-cloud-storage" % "2.55.0", // Blosc compression. import dev.zarr.bloscjava diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala index f3214cbd080..cddd7687393 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala @@ -50,12 +50,6 @@ object MagPathInfo { implicit val jsonFormat: OFormat[MagPathInfo] = Json.format[MagPathInfo] } -case class DataSourceRegistrationInfo(dataSource: DataSource, folderId: Option[String], dataStoreName: String) - -object DataSourceRegistrationInfo { - implicit val jsonFormat: OFormat[DataSourceRegistrationInfo] = Json.format[DataSourceRegistrationInfo] -} - trait RemoteWebknossosClient { def requestUserAccess(accessRequest: UserAccessRequest)(implicit tc: TokenContext): Fox[UserAccessAnswer] } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 320484ecacd..f27110e75a6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -31,7 +31,6 @@ import com.typesafe.scalalogging.LazyLogging import org.apache.commons.io.FileUtils import play.api.libs.json.{Json, OFormat, Reads} import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} -import software.amazon.awssdk.core.async.AsyncRequestBody import software.amazon.awssdk.core.checksums.RequestChecksumCalculation import software.amazon.awssdk.regions.Region import software.amazon.awssdk.services.s3.S3AsyncClient @@ -40,9 +39,10 @@ import software.amazon.awssdk.services.s3.model.{ CompletedMultipartUpload, CompletedPart, CreateMultipartUploadRequest, - PutObjectRequest, UploadPartRequest } +import software.amazon.awssdk.transfer.s3.S3TransferManager +import software.amazon.awssdk.transfer.s3.model.UploadDirectoryRequest import java.io.{File, RandomAccessFile} import java.net.URI @@ -584,7 +584,6 @@ class UploadService @Inject()(dataSourceService: DataSourceService, dataSource, newBasePath = s"s3://$endPointHost/${dataStoreConfig.Datastore.S3Upload.bucketName}/$s3ObjectKey") _ <- remoteWebknossosClient.updateDataSource(s3DataSource, datasetId, allowNewPaths = true) - // TODO: Is uploaded dataset size the same as local dataset size? datasetSize <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox _ = this.synchronized { PathUtils.deleteDirectoryRecursively(unpackToDir) @@ -671,38 +670,22 @@ class UploadService @Inject()(dataSourceService: DataSourceService, exploreLocalLayerService.writeLocalDatasourceProperties(dataSource, path)) } yield path + private lazy val transferManager = S3TransferManager.builder().s3Client(s3Client).build() + private def uploadDirectoryToS3( dataDir: Path, bucketName: String, prefix: String ): Fox[Unit] = for { - files <- PathUtils.listFilesRecursive(dataDir, silent = false, maxDepth = 20).toFox - uploadFoxes = files.map(filePath => { - val relPath = dataDir.relativize(filePath).toString.replace("\\", "/") - val s3Key = s"$prefix$relPath" - - // TODO: For large files, consider using multipart upload - logger.info("Uploading file to S3: " + filePath) - val bytes = Files.readAllBytes(filePath) - logger.info(s"Dataset Upload: Uploading ${bytes.length} bytes to s3://$bucketName/$s3Key") - val startTime = System.currentTimeMillis() - logger.info(s"Starting upload of $filePath to S3 at $startTime") - - for { - _ <- Fox.fromFuture { - s3Client - .putObject( - PutObjectRequest.builder().bucket(bucketName).key(s3Key).build(), - AsyncRequestBody.fromBytes(bytes) - ) - .asScala - } ?~> s"Failed to upload file $filePath to S3" - } yield () - }) - // TODO: Limit number of concurrent uploads? - _ <- Fox.combined(uploadFoxes) - _ = logger.info(s"Finished uploading directory to S3 at ${System.currentTimeMillis()}") + _ <- Fox.successful(()) + directoryUpload = transferManager.uploadDirectory( + UploadDirectoryRequest.builder().bucket(bucketName).s3Prefix(prefix).source(dataDir).build() + ) + completedUpload <- Fox.fromFuture(directoryUpload.completionFuture().asScala) + failedTransfers = completedUpload.failedTransfers() + _ <- Fox.fromBool(failedTransfers.isEmpty) ?~> + s"Some files failed to upload to S3: $failedTransfers" } yield () private def cleanUpOnFailure[T](result: Box[T], From 5d9eff409141c52d104a751ca66b245985117924 Mon Sep 17 00:00:00 2001 From: frcroth Date: Thu, 11 Sep 2025 17:14:49 +0200 Subject: [PATCH 05/62] Add application.conf --- conf/application.conf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/conf/application.conf b/conf/application.conf index 9debdad22f1..41021dfb82d 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -241,6 +241,14 @@ datastore { # The credentials are selected by uri prefix, so different s3 uri styles may need duplicated credential entries. credentials = [] } + s3Upload { + enabled = false + # Use the name (prefix) of a credential in the dataVaults section here to use it for uploads. + credentialName = "my-credential" + endpoint = "https://custom-s3-endpoint.example.local" + bucketName = "your-bucket-name" + objectKeyPrefix = "webknossos-uploads" + } } # Redirect some routes to prefix + route (only if features.isWkorgInstance, route "/" only if logged out) From 5e548c89c295e1818673662ff5f2b42d8c178c36 Mon Sep 17 00:00:00 2001 From: frcroth Date: Thu, 11 Sep 2025 18:10:30 +0200 Subject: [PATCH 06/62] Clean up unsued code, remove todos --- .../services/DataSourceService.scala | 6 +- .../services/uploading/UploadService.scala | 206 +++--------------- 2 files changed, 28 insertions(+), 184 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index c4b6c958209..a1513c8c038 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -365,13 +365,11 @@ class DataSourceService @Inject()( } } - // Replace relative paths with absolute paths - // TODO: Rename method - def replacePaths(dataSource: InboxDataSource, newBasePath: String): Fox[DataSource] = { + // Prepend newBasePath to all (relative) paths in mags and attachments of the data source. + def prependAllPaths(dataSource: InboxDataSource, newBasePath: String): Fox[DataSource] = { val replaceUri = (uri: URI) => { val isRelativeFilePath = (uri.getScheme == null || uri.getScheme.isEmpty || uri.getScheme == DataVaultService.schemeFile) && !uri.isAbsolute uri.getPath match { - // TODO: Does this make sense? case pathStr if isRelativeFilePath => new URI(uri.getScheme, uri.getUserInfo, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index f27110e75a6..f31951e3e86 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -34,23 +34,13 @@ import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCrede import software.amazon.awssdk.core.checksums.RequestChecksumCalculation import software.amazon.awssdk.regions.Region import software.amazon.awssdk.services.s3.S3AsyncClient -import software.amazon.awssdk.services.s3.model.{ - CompleteMultipartUploadRequest, - CompletedMultipartUpload, - CompletedPart, - CreateMultipartUploadRequest, - UploadPartRequest -} import software.amazon.awssdk.transfer.s3.S3TransferManager import software.amazon.awssdk.transfer.s3.model.UploadDirectoryRequest import java.io.{File, RandomAccessFile} import java.net.URI -import java.util import java.nio.file.{Files, Path} -import java.util.stream.{Collectors, StreamSupport} import scala.concurrent.{ExecutionContext, Future} -import scala.jdk.CollectionConverters.IterableHasAsJava import scala.jdk.FutureConverters._ case class ReserveUploadInformation( @@ -175,10 +165,6 @@ class UploadService @Inject()(dataSourceService: DataSourceService, s"upload___${uploadId}___datasetId" private def redisKeyForFilePaths(uploadId: String): String = s"upload___${uploadId}___filePaths" - private def redisKeyForS3MultipartUploadId(uploadId: String, fileName: String): String = - s"upload___${uploadId}___file___${fileName}___s3MultipartUploadId" - private def redisKeyForS3PartETag(uploadId: String, fileName: String, partNumber: Long): String = - s"upload___${uploadId}___file___${fileName}___partETag___$partNumber" cleanUpOrphanUploads() @@ -349,169 +335,6 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } else Fox.successful(()) } - private lazy val s3UploadCredentialsOpt: Option[(String, String)] = - dataStoreConfig.Datastore.DataVaults.credentials.flatMap { credentialConfig => - new CredentialConfigReader(credentialConfig).getCredential - }.collectFirst { - case S3AccessKeyCredential(credentialName, accessKeyId, secretAccessKey, _, _) - if dataStoreConfig.Datastore.S3Upload.credentialName == credentialName => - (accessKeyId, secretAccessKey) - } - private lazy val s3Client: S3AsyncClient = S3AsyncClient - .builder() - .credentialsProvider( - StaticCredentialsProvider.create( - AwsBasicCredentials.builder - .accessKeyId(s3UploadCredentialsOpt.getOrElse(("", ""))._1) - .secretAccessKey(s3UploadCredentialsOpt.getOrElse(("", ""))._2) - .build() - )) - .crossRegionAccessEnabled(true) - .forcePathStyle(true) - .endpointOverride(new URI(dataStoreConfig.Datastore.S3Upload.endpoint)) - .region(Region.US_EAST_1) - // Disabling checksum calculation prevents files being stored with Content Encoding "aws-chunked". - .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) - .build() - - def handleUploadChunkAws( - uploadFileId: String, - chunkSize: Long, - currentChunkSize: Long, - totalChunkCount: Long, - currentChunkNumber: Long, - chunkFile: File, - bucketName: String, - objectKey: String - ): Fox[Unit] = { - val uploadId = extractDatasetUploadId(uploadFileId) - - def getAllPartETags(uploadId: String, filePath: String, totalChunkCount: Long): Fox[Vector[(Int, String)]] = - for { - possibleEtags <- Fox.combined( - (1L to totalChunkCount).map(i => - runningUploadMetadataStore.find(redisKeyForS3PartETag(uploadId, filePath, i))) - ) - etagsWithIndex = possibleEtags.zipWithIndex - foundEtags = etagsWithIndex.collect { - case (Some(etag), idx) => (idx + 1, etag) // partNumber starts at 1 - } - } yield foundEtags.toVector - - for { - dataSourceId <- getDataSourceIdByUploadId(uploadId) - (filePath, uploadDir) <- getFilePathAndDirOfUploadId(uploadFileId) - - isFileKnown <- runningUploadMetadataStore.contains(redisKeyForFileChunkCount(uploadId, filePath)) - totalFileSizeInBytesOpt <- runningUploadMetadataStore.findLong(redisKeyForTotalFileSizeInBytes(uploadId)) - - _ <- Fox.runOptional(totalFileSizeInBytesOpt) { maxFileSize => - runningUploadMetadataStore - .increaseBy(redisKeyForCurrentUploadedTotalFileSizeInBytes(uploadId), currentChunkSize) - .flatMap(newTotalFileSizeInBytesOpt => { - if (newTotalFileSizeInBytesOpt.getOrElse(0L) > maxFileSize) { - cleanUpDatasetExceedingSize(uploadDir, uploadId).flatMap(_ => - Fox.failure("dataset.upload.moreBytesThanReserved")) - } else Fox.successful(()) - }) - } - - // Initialize multipart upload on first chunk - _ <- Fox.runIf(!isFileKnown) { - for { - _ <- runningUploadMetadataStore.insertIntoSet(redisKeyForFileNameSet(uploadId), filePath) - _ <- runningUploadMetadataStore.insert( - redisKeyForFileChunkCount(uploadId, filePath), - String.valueOf(totalChunkCount) - ) - // Start multipart upload - createResp <- Fox.fromFuture { - s3Client - .createMultipartUpload( - CreateMultipartUploadRequest.builder().bucket(bucketName).key(objectKey).build() - ) - .asScala - } - _ <- runningUploadMetadataStore.insert( - redisKeyForS3MultipartUploadId(uploadId, filePath), - createResp.uploadId() - ) - } yield () - } - - isNewChunk <- runningUploadMetadataStore.insertIntoSet( - redisKeyForFileChunkSet(uploadId, filePath), - String.valueOf(currentChunkNumber) - ) - - } yield { - if (isNewChunk) { - try { - val bytes = Files.readAllBytes(chunkFile.toPath) - for { - s3UploadIdOpt <- runningUploadMetadataStore.find(redisKeyForS3MultipartUploadId(uploadId, filePath)) - s3UploadId <- s3UploadIdOpt.toFox ?~> s"No multipart uploadId found for $filePath" - - // Upload part to S3 - uploadResp <- Fox.fromFuture { - s3Client - .uploadPart( - UploadPartRequest - .builder() - .bucket(bucketName) - .key(objectKey) - .uploadId(s3UploadId) - .partNumber(currentChunkNumber.toInt) - .contentLength(currentChunkSize) - .build(), - software.amazon.awssdk.core.async.AsyncRequestBody.fromBytes(bytes) - ) - .asScala - } - - // Store ETag for later completion - _ <- runningUploadMetadataStore.insert( - redisKeyForS3PartETag(uploadId, filePath, currentChunkNumber), - uploadResp.eTag() - ) - - // Complete multipart upload if all chunks uploaded - _ <- Fox.runIf(currentChunkNumber == totalChunkCount) { - for { - eTags <- getAllPartETags(uploadId, filePath, totalChunkCount) - completedParts: util.List[CompletedPart] = StreamSupport - .stream(eTags.map { - case (partNum, etag) => - CompletedPart.builder().partNumber(partNum).eTag(etag).build() - }.asJava.spliterator(), false) - .collect(Collectors.toList()) - completeReq = CompleteMultipartUploadRequest - .builder() - .bucket(bucketName) - .key(objectKey) - .uploadId(s3UploadId) - .multipartUpload( - CompletedMultipartUpload.builder().parts(completedParts).build() - ) - .build() - _ <- Fox.fromFuture(s3Client.completeMultipartUpload(completeReq).asScala) - } yield () - } - } yield Fox.successful(()) - - } catch { - case e: Exception => - runningUploadMetadataStore.removeFromSet(redisKeyForFileChunkSet(uploadId, filePath), - String.valueOf(currentChunkNumber)) - val errorMsg = - s"Error receiving chunk $currentChunkNumber for upload ${dataSourceId.directoryName}: ${e.getMessage}" - logger.warn(errorMsg) - Fox.failure(errorMsg) - } - } else Fox.successful(()) - } - } - def cancelUpload(cancelUploadInformation: CancelUploadInformation): Fox[Unit] = { val uploadId = cancelUploadInformation.uploadId for { @@ -567,7 +390,6 @@ class UploadService @Inject()(dataSourceService: DataSourceService, unpackToDir, dataSourceId, linkedLayerInfo.layersToLink).shiftBox - // Post-processing needs to be handled differently for s3 uploads? _ <- cleanUpOnFailure(postProcessingResult, dataSourceId, datasetNeedsConversion, @@ -580,7 +402,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/$uploadId/" _ <- uploadDirectoryToS3(unpackToDir, dataStoreConfig.Datastore.S3Upload.bucketName, s3ObjectKey) endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.endpoint).getHost - s3DataSource <- dataSourceService.replacePaths( + s3DataSource <- dataSourceService.prependAllPaths( dataSource, newBasePath = s"s3://$endPointHost/${dataStoreConfig.Datastore.S3Upload.bucketName}/$s3ObjectKey") _ <- remoteWebknossosClient.updateDataSource(s3DataSource, datasetId, allowNewPaths = true) @@ -670,6 +492,31 @@ class UploadService @Inject()(dataSourceService: DataSourceService, exploreLocalLayerService.writeLocalDatasourceProperties(dataSource, path)) } yield path + private lazy val s3UploadCredentialsOpt: Option[(String, String)] = + dataStoreConfig.Datastore.DataVaults.credentials.flatMap { credentialConfig => + new CredentialConfigReader(credentialConfig).getCredential + }.collectFirst { + case S3AccessKeyCredential(credentialName, accessKeyId, secretAccessKey, _, _) + if dataStoreConfig.Datastore.S3Upload.credentialName == credentialName => + (accessKeyId, secretAccessKey) + } + private lazy val s3Client: S3AsyncClient = S3AsyncClient + .builder() + .credentialsProvider( + StaticCredentialsProvider.create( + AwsBasicCredentials.builder + .accessKeyId(s3UploadCredentialsOpt.getOrElse(("", ""))._1) + .secretAccessKey(s3UploadCredentialsOpt.getOrElse(("", ""))._2) + .build() + )) + .crossRegionAccessEnabled(true) + .forcePathStyle(true) + .endpointOverride(new URI(dataStoreConfig.Datastore.S3Upload.endpoint)) + .region(Region.US_EAST_1) + // Disabling checksum calculation prevents files being stored with Content Encoding "aws-chunked". + .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) + .build() + private lazy val transferManager = S3TransferManager.builder().s3Client(s3Client).build() private def uploadDirectoryToS3( @@ -967,7 +814,6 @@ class UploadService @Inject()(dataSourceService: DataSourceService, dataSourceId <- getDataSourceIdByUploadId(uploadId) _ <- runningUploadMetadataStore.remove(redisKeyForDataSourceId(uploadId)) _ <- runningUploadMetadataStore.remove(redisKeyForDatasetId(uploadId)) - // TODO: Remove S3 multipart upload if present _ <- runningUploadMetadataStore.remove(redisKeyForLinkedLayerIdentifier(uploadId)) _ <- runningUploadMetadataStore.remove(redisKeyForUploadId(dataSourceId)) _ <- runningUploadMetadataStore.remove(redisKeyForFilePaths(uploadId)) From 71b3e304092065c7f5cab167db20e3ba3c608bda Mon Sep 17 00:00:00 2001 From: frcroth Date: Thu, 11 Sep 2025 18:22:12 +0200 Subject: [PATCH 07/62] Update changelog --- unreleased_changes/8912.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 unreleased_changes/8912.md diff --git a/unreleased_changes/8912.md b/unreleased_changes/8912.md new file mode 100644 index 00000000000..1a198793575 --- /dev/null +++ b/unreleased_changes/8912.md @@ -0,0 +1,17 @@ +### Added +- Datasets can be uploaded to S3-compatible object storage services. This is disabled by default. + +### Migration +- New keys have been added to the application.conf of the data store. + +Add these lines in the "datastore" section: +``` +s3Upload { + enabled = true + # Use the prefix / name of a credential in the dataVaults section here to use it for uploads. + credentialName = "s3://fsn1.your-objectstorage.com/webknossos-test/upload-tests/" + endpoint = "https://fsn1.your-objectstorage.com" + bucketName = "webknossos-test" + objectKeyPrefix = "upload-tests" + } +``` From 5c6406332ffec057f0668b398754ef1038a95ca7 Mon Sep 17 00:00:00 2001 From: frcroth Date: Fri, 12 Sep 2025 11:48:22 +0200 Subject: [PATCH 08/62] Add absolute paths to datasets with no mag paths --- .../webknossos/datastore/services/DataSourceService.scala | 8 +++++++- .../datastore/services/uploading/UploadService.scala | 5 ++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index a1513c8c038..f3ea99e542b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -392,7 +392,13 @@ class DataSourceService @Inject()( mag => mag.path match { case Some(pathStr) => mag.copy(path = Some(replaceUri(new URI(pathStr)).toString)) - case _ => mag + // If the mag does not have a path, it is an implicit path, we need to make it explicit. + case _ => + mag.copy( + path = Some( + new URI(newBasePath) + .resolve(List(layerWithMagLocators.name, mag.mag.toMagLiteral(true)).mkString("/")) + .toString)) }, attachmentMapping = attachment => DatasetLayerAttachments( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index f31951e3e86..dfcab655ce0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -398,9 +398,12 @@ class UploadService @Inject()(dataSourceService: DataSourceService, datasetSizeBytes: Long <- if (uploadToS3) { for { _ <- Fox.successful(()) - + _ = logger.info( + s"Starting upload of dataset ${dataSourceId.organizationId}/${dataSourceId.directoryName} to S3.") s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/$uploadId/" _ <- uploadDirectoryToS3(unpackToDir, dataStoreConfig.Datastore.S3Upload.bucketName, s3ObjectKey) + _ = logger.info( + s"Finished upload of dataset ${dataSourceId.organizationId}/${dataSourceId.directoryName} to S3.") endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.endpoint).getHost s3DataSource <- dataSourceService.prependAllPaths( dataSource, From 631bc4a6d68f165d420290f9f5c2ea47979e63f6 Mon Sep 17 00:00:00 2001 From: frcroth Date: Mon, 15 Sep 2025 17:12:05 +0200 Subject: [PATCH 09/62] Do not upload unreferenced files to S3 --- .../services/uploading/UploadService.scala | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index dfcab655ce0..0ea1e916e1e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -20,6 +20,7 @@ import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, Directory import com.scalableminds.webknossos.datastore.models.UnfinishedUpload import com.scalableminds.webknossos.datastore.models.datasource.GenericDataSource.FILENAME_DATASOURCE_PROPERTIES_JSON import com.scalableminds.webknossos.datastore.models.datasource._ +import com.scalableminds.webknossos.datastore.models.datasource.inbox.InboxDataSource import com.scalableminds.webknossos.datastore.services.{DSRemoteWebknossosClient, DataSourceService} import com.scalableminds.webknossos.datastore.storage.{ CredentialConfigReader, @@ -401,7 +402,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ = logger.info( s"Starting upload of dataset ${dataSourceId.organizationId}/${dataSourceId.directoryName} to S3.") s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/$uploadId/" - _ <- uploadDirectoryToS3(unpackToDir, dataStoreConfig.Datastore.S3Upload.bucketName, s3ObjectKey) + _ <- uploadDirectoryToS3(unpackToDir, dataSource, dataStoreConfig.Datastore.S3Upload.bucketName, s3ObjectKey) _ = logger.info( s"Finished upload of dataset ${dataSourceId.organizationId}/${dataSourceId.directoryName} to S3.") endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.endpoint).getHost @@ -524,11 +525,23 @@ class UploadService @Inject()(dataSourceService: DataSourceService, private def uploadDirectoryToS3( dataDir: Path, + dataSource: InboxDataSource, bucketName: String, prefix: String ): Fox[Unit] = for { _ <- Fox.successful(()) + // Delete all files in the dataDir that are not at a mag path or an attachment path, since we do not need to upload them to S3. + filesToDelete <- getNonReferencedFiles(dataDir, dataSource) + _ = filesToDelete.foreach(file => { + logger.info(s"Deleting file $file before upload to S3.") + try { + Files.deleteIfExists(file) + } catch { + case e: Exception => + logger.warn(s"Could not delete file $file before upload to S3: ${e.getMessage}") + } + }) directoryUpload = transferManager.uploadDirectory( UploadDirectoryRequest.builder().bucket(bucketName).s3Prefix(prefix).source(dataDir).build() ) @@ -538,6 +551,26 @@ class UploadService @Inject()(dataSourceService: DataSourceService, s"Some files failed to upload to S3: $failedTransfers" } yield () + private def getNonReferencedFiles(dataDir: Path, dataSource: InboxDataSource): Fox[List[Path]] = + for { + usableDataSource <- dataSource.toUsable.toFox ?~> "Data source is not usable" + explicitPaths: Set[Path] = usableDataSource.dataLayers + .flatMap(layer => + layer.mags.map(mag => + mag.path match { + case Some(_) => None + case None => Some(dataDir.resolve(List(layer.name, mag.mag.toMagLiteral(true)).mkString("/"))) + })) + .flatten + .toSet + neededPaths = usableDataSource.dataLayers + .flatMap(layer => layer.allExplicitPaths) + .map(dataDir.resolve) + .toSet ++ explicitPaths + allFiles <- PathUtils.listFilesRecursive(dataDir, silent = true, maxDepth = 10).toFox + filesToDelete = allFiles.filterNot(file => neededPaths.exists(neededPath => file.startsWith(neededPath))) + } yield filesToDelete + private def cleanUpOnFailure[T](result: Box[T], dataSourceId: DataSourceId, datasetNeedsConversion: Boolean, From acc56ba780c4204948498c959c75c230f4406acc Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 17 Sep 2025 11:25:01 +0200 Subject: [PATCH 10/62] Do not upload on conversion, simplify application.conf --- conf/application.conf | 6 +- unreleased_changes/8912.md | 2 +- .../datastore/DataStoreConfig.scala | 2 - .../datastore/datavault/S3DataVault.scala | 2 +- .../services/uploading/UploadService.scala | 86 ++++++++++++------- 5 files changed, 58 insertions(+), 40 deletions(-) diff --git a/conf/application.conf b/conf/application.conf index 41021dfb82d..c4248f507e7 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -243,10 +243,8 @@ datastore { } s3Upload { enabled = false - # Use the name (prefix) of a credential in the dataVaults section here to use it for uploads. - credentialName = "my-credential" - endpoint = "https://custom-s3-endpoint.example.local" - bucketName = "your-bucket-name" + # Use the name of a credential in the dataVaults section here to use it for uploads. + credentialName = "s3://example/uri/prefix" objectKeyPrefix = "webknossos-uploads" } } diff --git a/unreleased_changes/8912.md b/unreleased_changes/8912.md index 1a198793575..69dc6ffe09b 100644 --- a/unreleased_changes/8912.md +++ b/unreleased_changes/8912.md @@ -4,7 +4,7 @@ ### Migration - New keys have been added to the application.conf of the data store. -Add these lines in the "datastore" section: +Add these lines in the "datastore" section to enable S3 upload: ``` s3Upload { enabled = true diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreConfig.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreConfig.scala index fa704074507..e5c5358a327 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreConfig.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreConfig.scala @@ -62,8 +62,6 @@ class DataStoreConfig @Inject()(configuration: Configuration) extends ConfigRead } object S3Upload { val enabled: Boolean = get[Boolean]("datastore.s3Upload.enabled") - val endpoint: String = get[String]("datastore.s3Upload.endpoint") - val bucketName: String = get[String]("datastore.s3Upload.bucketName") val objectKeyPrefix: String = get[String]("datastore.s3Upload.objectKeyPrefix") val credentialName: String = get[String]("datastore.s3Upload.credentialName") } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala index 11393142b8b..d488d5b6d07 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala @@ -159,7 +159,7 @@ object S3DataVault { new S3DataVault(credential, remoteSourceDescriptor.uri, ws, ec) } - private def hostBucketFromUri(uri: URI): Option[String] = { + def hostBucketFromUri(uri: URI): Option[String] = { val host = uri.getHost if (isShortStyle(uri)) { // assume host is omitted from uri, shortcut form s3://bucket/key Some(host) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 0ea1e916e1e..40f991e5517 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -5,6 +5,7 @@ import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.io.PathUtils.ensureDirectoryBox import com.scalableminds.util.io.{PathUtils, ZipIO} import com.scalableminds.util.objectid.ObjectId +import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Box.tryo import com.scalableminds.util.tools._ import com.scalableminds.webknossos.datastore.DataStoreConfig @@ -15,6 +16,7 @@ import com.scalableminds.webknossos.datastore.datareaders.n5.{N5Header, N5Metada import com.scalableminds.webknossos.datastore.datareaders.precomputed.PrecomputedHeader.FILENAME_INFO import com.scalableminds.webknossos.datastore.datareaders.zarr.NgffMetadata.FILENAME_DOT_ZATTRS import com.scalableminds.webknossos.datastore.datareaders.zarr.ZarrHeader.FILENAME_DOT_ZARRAY +import com.scalableminds.webknossos.datastore.datavault.S3DataVault import com.scalableminds.webknossos.datastore.explore.ExploreLocalLayerService import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, DirectoryConstants} import com.scalableminds.webknossos.datastore.models.UnfinishedUpload @@ -396,19 +398,20 @@ class UploadService @Inject()(dataSourceService: DataSourceService, datasetNeedsConversion, label = s"processing dataset at $unpackToDir") dataSource = dataSourceService.dataSourceFromDir(unpackToDir, dataSourceId.organizationId) - datasetSizeBytes: Long <- if (uploadToS3) { + datasetSizeBytes: Long <- if (uploadToS3 && !datasetNeedsConversion) { for { _ <- Fox.successful(()) - _ = logger.info( - s"Starting upload of dataset ${dataSourceId.organizationId}/${dataSourceId.directoryName} to S3.") + beforeS3Upload = Instant.now + s3UploadBucket <- s3UploadBucketOpt.toFox s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/$uploadId/" - _ <- uploadDirectoryToS3(unpackToDir, dataSource, dataStoreConfig.Datastore.S3Upload.bucketName, s3ObjectKey) - _ = logger.info( - s"Finished upload of dataset ${dataSourceId.organizationId}/${dataSourceId.directoryName} to S3.") - endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.endpoint).getHost - s3DataSource <- dataSourceService.prependAllPaths( - dataSource, - newBasePath = s"s3://$endPointHost/${dataStoreConfig.Datastore.S3Upload.bucketName}/$s3ObjectKey") + _ <- uploadDirectoryToS3(unpackToDir, dataSource, s3UploadBucket, s3ObjectKey) + _ = Instant.logSince(beforeS3Upload, + s"Upload of dataset ${dataSourceId.organizationId}/${dataSourceId.directoryName} to S3", + logger) + endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.credentialName).getHost + s3DataSource <- dataSourceService.prependAllPaths(dataSource, + newBasePath = + s"s3://$endPointHost/$s3UploadBucket/$s3ObjectKey") _ <- remoteWebknossosClient.updateDataSource(s3DataSource, datasetId, allowNewPaths = true) datasetSize <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox _ = this.synchronized { @@ -504,24 +507,41 @@ class UploadService @Inject()(dataSourceService: DataSourceService, if dataStoreConfig.Datastore.S3Upload.credentialName == credentialName => (accessKeyId, secretAccessKey) } - private lazy val s3Client: S3AsyncClient = S3AsyncClient - .builder() - .credentialsProvider( - StaticCredentialsProvider.create( - AwsBasicCredentials.builder - .accessKeyId(s3UploadCredentialsOpt.getOrElse(("", ""))._1) - .secretAccessKey(s3UploadCredentialsOpt.getOrElse(("", ""))._2) - .build() - )) - .crossRegionAccessEnabled(true) - .forcePathStyle(true) - .endpointOverride(new URI(dataStoreConfig.Datastore.S3Upload.endpoint)) - .region(Region.US_EAST_1) - // Disabling checksum calculation prevents files being stored with Content Encoding "aws-chunked". - .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) - .build() - - private lazy val transferManager = S3TransferManager.builder().s3Client(s3Client).build() + private lazy val s3UploadBucketOpt: Option[String] = + S3DataVault.hostBucketFromUri(new URI(dataStoreConfig.Datastore.S3Upload.credentialName)) + private lazy val s3UploadEndpoint: URI = { + val credentialUri = new URI(dataStoreConfig.Datastore.S3Upload.credentialName) + new URI( + "https", + null, + credentialUri.getHost, + -1, + null, + null, + null + ) + } + private lazy val s3ClientBox: Box[S3AsyncClient] = for { + accessKeyId <- Box(s3UploadCredentialsOpt.map(_._1)) + secretAccessKey <- Box(s3UploadCredentialsOpt.map(_._2)) + } yield + S3AsyncClient + .builder() + .credentialsProvider( + StaticCredentialsProvider.create( + AwsBasicCredentials.builder.accessKeyId(accessKeyId).secretAccessKey(secretAccessKey).build() + )) + .crossRegionAccessEnabled(true) + .forcePathStyle(true) + .endpointOverride(s3UploadEndpoint) + .region(Region.US_EAST_1) + // Disabling checksum calculation prevents files being stored with Content Encoding "aws-chunked". + .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) + .build() + + private lazy val transferManagerBox: Box[S3TransferManager] = for { + client <- s3ClientBox + } yield S3TransferManager.builder().s3Client(client).build() private def uploadDirectoryToS3( dataDir: Path, @@ -542,6 +562,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, logger.warn(s"Could not delete file $file before upload to S3: ${e.getMessage}") } }) + transferManager <- transferManagerBox.toFox ?~> "S3 upload is not properly configured, cannot get S3 client" directoryUpload = transferManager.uploadDirectory( UploadDirectoryRequest.builder().bucket(bucketName).s3Prefix(prefix).source(dataDir).build() ) @@ -620,15 +641,16 @@ class UploadService @Inject()(dataSourceService: DataSourceService, datasetNeedsConversion: Boolean, uploadToS3: Boolean): Path = { val dataSourceDir = { - if (uploadToS3) - s3UploadDirectory(dataSourceId.organizationId, dataSourceId.directoryName) + if (datasetNeedsConversion) + dataBaseDir.resolve(dataSourceId.organizationId).resolve(forConversionDir).resolve(dataSourceId.directoryName) else { - if (datasetNeedsConversion) - dataBaseDir.resolve(dataSourceId.organizationId).resolve(forConversionDir).resolve(dataSourceId.directoryName) + if (uploadToS3) + s3UploadDirectory(dataSourceId.organizationId, dataSourceId.directoryName) else dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) } } + dataSourceDir } From fc222456aad4a1b7944c2ac2b9dd027353c78a9d Mon Sep 17 00:00:00 2001 From: frcroth Date: Mon, 15 Sep 2025 15:03:31 +0200 Subject: [PATCH 11/62] Implement deletion of S3 datasets --- app/models/dataset/DatasetService.scala | 20 +-- .../controllers/DataSourceController.scala | 6 +- .../datastore/datavault/S3DataVault.scala | 3 +- .../services/DataSourceService.scala | 132 +++++++++++++++++- 4 files changed, 147 insertions(+), 14 deletions(-) diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index ff55b49562a..d7735d831f4 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -671,16 +671,16 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, def deleteVirtualOrDiskDataset(dataset: Dataset)(implicit ctx: DBAccessContext): Fox[Unit] = for { - _ <- if (dataset.isVirtual) { - // At this point, we should also free space in S3 once implemented. - // Right now, we can just mark the dataset as deleted in the database. - datasetDAO.deleteDataset(dataset._id, onlyMarkAsDeleted = true) - } else { - for { - datastoreClient <- clientFor(dataset) - _ <- datastoreClient.deleteOnDisk(dataset._id) - } yield () - } ?~> "dataset.delete.failed" + //_ <- if (dataset.isVirtual) { + // At this point, we should also free space in S3 once implemented. + // Right now, we can just mark the dataset as deleted in the database. + // datasetDAO.deleteDataset(dataset._id, onlyMarkAsDeleted = true) + //} else { + //for { + datastoreClient <- clientFor(dataset) + _ <- datastoreClient.deleteOnDisk(dataset._id) ?~> "dataset.delete.failed" + // } yield () + //} ?~> "dataset.delete.failed" } yield () def publicWrites(dataset: Dataset, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 5aeb70c1bef..b541f5c8b3d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -448,20 +448,22 @@ class DataSourceController @Inject()( for { dataSource <- datasetCache.getById(datasetId) ~> NOT_FOUND dataSourceId = dataSource.id - _ <- if (dataSourceService.existsOnDisk(dataSourceId.organizationId, dataSourceId.directoryName)) { + existsOnDisk = dataSourceService.existsOnDisk(dataSourceId.organizationId, dataSourceId.directoryName) + _ <- if (existsOnDisk) { for { _ <- dataSourceService.deleteOnDisk( dataSourceId.organizationId, dataSourceId.directoryName, Some(datasetId), reason = Some("the user wants to delete the dataset")) ?~> "dataset.delete.failed" - _ <- dsRemoteWebknossosClient.deleteDataSource(dataSourceId) } yield () } else for { + _ <- Fox.runIf(dataSourceService.datasetInControlledS3(dataSource))(dataSourceService.deleteFromControlledS3(dataSource)) _ <- dsRemoteWebknossosClient.deleteDataSource(dataSourceId) _ = logger.warn(s"Tried to delete dataset ${dataSource.id} that is not on disk.") } yield () + _ <- dsRemoteWebknossosClient.deleteDataSource(dataSourceId) } yield Ok } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala index d488d5b6d07..ae88eae708b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala @@ -159,6 +159,7 @@ object S3DataVault { new S3DataVault(credential, remoteSourceDescriptor.uri, ws, ec) } + // TODO: Move non private methods to trait? def hostBucketFromUri(uri: URI): Option[String] = { val host = uri.getHost if (isShortStyle(uri)) { // assume host is omitted from uri, shortcut form s3://bucket/key @@ -185,7 +186,7 @@ object S3DataVault { private def isShortStyle(uri: URI): Boolean = !uri.getHost.contains(".") - private def objectKeyFromUri(uri: URI): Box[String] = + def objectKeyFromUri(uri: URI): Box[String] = if (isVirtualHostedStyle(uri)) { Full(uri.getPath) } else if (isPathStyle(uri)) { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index f3ea99e542b..dc0045a5856 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -13,12 +13,29 @@ import com.scalableminds.webknossos.datastore.dataformats.{MagLocator, MappingPr import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, IntervalScheduler} import com.scalableminds.webknossos.datastore.models.datasource._ import com.scalableminds.webknossos.datastore.models.datasource.inbox.{InboxDataSource, UnusableDataSource} -import com.scalableminds.webknossos.datastore.storage.{DataVaultService, RemoteSourceDescriptorService} +import com.scalableminds.webknossos.datastore.storage.{ + CredentialConfigReader, + DataVaultService, + RemoteSourceDescriptorService, + S3AccessKeyCredential +} import com.typesafe.scalalogging.LazyLogging import com.scalableminds.util.tools.Box.tryo import com.scalableminds.util.tools._ +import com.scalableminds.webknossos.datastore.datavault.S3DataVault import play.api.inject.ApplicationLifecycle import play.api.libs.json.Json +import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} +import software.amazon.awssdk.core.checksums.RequestChecksumCalculation +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.s3.S3AsyncClient +import software.amazon.awssdk.services.s3.model.{ + Delete, + DeleteObjectsRequest, + DeleteObjectsResponse, + ListObjectsV2Request, + ObjectIdentifier +} import java.io.{File, FileWriter} import java.net.URI @@ -26,6 +43,8 @@ import java.nio.file.{Files, Path} import scala.concurrent.ExecutionContext import scala.concurrent.duration._ import scala.io.Source +import scala.jdk.CollectionConverters._ +import scala.jdk.FutureConverters._ class DataSourceService @Inject()( config: DataStoreConfig, @@ -446,4 +465,115 @@ class DataSourceService @Inject()( remoteSourceDescriptorService.removeVaultFromCache(attachment))) } yield dataLayer.mags.length } yield removedEntriesList.sum + + private lazy val globalCredentials = { + val res = config.Datastore.DataVaults.credentials.flatMap { credentialConfig => + new CredentialConfigReader(credentialConfig).getCredential + } + logger.info(s"Parsed ${res.length} global data vault credentials from datastore config.") + res + } + + def datasetInControlledS3(dataSource: DataSource) = { + def commonPrefix(strings: Seq[String]): String = { + if (strings.isEmpty) return "" + + strings.reduce { (a, b) => + a.zip(b).takeWhile { case (c1, c2) => c1 == c2 }.map(_._1).mkString + } + } + + val allPaths = dataSource.allExplicitPaths + val sharedPath = commonPrefix(allPaths) + val matchingCredentials = globalCredentials.filter(c => sharedPath.startsWith(c.name)) + matchingCredentials.nonEmpty && sharedPath.startsWith("s3") + } + + private lazy val s3UploadCredentialsOpt: Option[(String, String)] = + config.Datastore.DataVaults.credentials.flatMap { credentialConfig => + new CredentialConfigReader(credentialConfig).getCredential + }.collectFirst { + case S3AccessKeyCredential(credentialName, accessKeyId, secretAccessKey, _, _) + if config.Datastore.S3Upload.credentialName == credentialName => + (accessKeyId, secretAccessKey) + } + private lazy val s3Client: S3AsyncClient = S3AsyncClient + .builder() + .credentialsProvider( + StaticCredentialsProvider.create( + AwsBasicCredentials.builder + .accessKeyId(s3UploadCredentialsOpt.getOrElse(("", ""))._1) + .secretAccessKey(s3UploadCredentialsOpt.getOrElse(("", ""))._2) + .build() + )) + .crossRegionAccessEnabled(true) + .forcePathStyle(true) + .endpointOverride(new URI(config.Datastore.S3Upload.endpoint)) + .region(Region.US_EAST_1) + // Disabling checksum calculation prevents files being stored with Content Encoding "aws-chunked". + .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) + .build() + + def deleteFromControlledS3(dataSource: DataSource): Fox[Unit] = { + // TODO: Do we handle other datasets using the same layers? + + def deleteBatch(bucket: String, keys: Seq[String]): Fox[DeleteObjectsResponse] = + if (keys.isEmpty) Fox.empty + else { + Fox.fromFuture( + s3Client + .deleteObjects( + DeleteObjectsRequest + .builder() + .bucket(bucket) + .delete( + Delete + .builder() + .objects( + keys.map(k => ObjectIdentifier.builder().key(k).build()).asJava + ) + .build() + ) + .build() + ) + .asScala) + } + + def listKeysAtPrefix(bucket: String, prefix: String): Fox[Seq[String]] = { + def listRec(continuationToken: Option[String], acc: Seq[String]): Fox[Seq[String]] = { + val builder = ListObjectsV2Request.builder().bucket(bucket).prefix(prefix).maxKeys(1000) + val request = continuationToken match { + case Some(token) => builder.continuationToken(token).build() + case None => builder.build() + } + for { + response <- Fox.fromFuture(s3Client.listObjectsV2(request).asScala) + keys = response.contents().asScala.map(_.key()) + allKeys = acc ++ keys + result <- if (response.isTruncated) { + listRec(Option(response.nextContinuationToken()), allKeys) + } else { + Fox.successful(allKeys) + } + } yield result + } + listRec(None, Seq()) + } + + for { + _ <- Fox.successful(()) + paths = dataSource.allExplicitPaths + // Assume everything is in the same bucket + firstPath <- paths.headOption.toFox ?~> "No explicit paths found for dataset in controlled S3" + bucket <- S3DataVault + .hostBucketFromUri(new URI(firstPath)) + .toFox ?~> s"Could not determine S3 bucket from path $firstPath" + prefixes <- Fox.combined(paths.map(path => S3DataVault.objectKeyFromUri(new URI(path)).toFox)) + keys: Seq[String] <- Fox.serialCombined(prefixes)(listKeysAtPrefix(bucket, _)).map(_.flatten) + uniqueKeys = keys.distinct + _ = logger.info( + s"Deleting ${uniqueKeys.length} objects from controlled S3 bucket $bucket for dataset ${dataSource.id}") + _ <- Fox.serialCombined(uniqueKeys.grouped(1000).toSeq)(deleteBatch(bucket, _)).map(_ => ()) + } yield () + } } From 063132d14674e1736e21441bb8536283921e289e Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 17 Sep 2025 09:52:00 +0200 Subject: [PATCH 12/62] Do not delete mags that are referenced in other layers --- app/models/dataset/Dataset.scala | 4 +- .../controllers/DataSourceController.scala | 2 +- .../services/DataSourceService.scala | 42 +++++++++++-------- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index 2b995701c2c..d9645b6a085 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -772,8 +772,8 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte layer.magsOpt match { case Some(mags) => mags.map(mag => { - q"""INSERT INTO webknossos.dataset_mags(_dataset, dataLayerName, mag, path, axisOrder, channelIndex, credentialId) - VALUES($datasetId, ${layer.name}, ${mag.mag}, ${mag.path}, ${mag.axisOrder + q"""INSERT INTO webknossos.dataset_mags(_dataset, dataLayerName, mag, path, realPath, axisOrder, channelIndex, credentialId) + VALUES($datasetId, ${layer.name}, ${mag.mag}, ${mag.path}, ${mag.path}, ${mag.axisOrder .map(Json.toJson(_))}, ${mag.channelIndex}, ${mag.credentialId}) """.asUpdate }) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index b541f5c8b3d..fb6e2b23ec0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -459,7 +459,7 @@ class DataSourceController @Inject()( } yield () } else for { - _ <- Fox.runIf(dataSourceService.datasetInControlledS3(dataSource))(dataSourceService.deleteFromControlledS3(dataSource)) + _ <- Fox.runIf(dataSourceService.datasetInControlledS3(dataSource))(dataSourceService.deleteFromControlledS3(dataSource, datasetId)) _ <- dsRemoteWebknossosClient.deleteDataSource(dataSourceId) _ = logger.warn(s"Tried to delete dataset ${dataSource.id} that is not on disk.") } yield () diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index dc0045a5856..8d692bb71cf 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -6,11 +6,12 @@ import com.google.inject.name.Named import com.scalableminds.util.io.PathUtils import com.scalableminds.util.io.PathUtils.ensureDirectoryBox import com.scalableminds.util.mvc.Formatter +import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.dataformats.{MagLocator, MappingProvider} -import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, IntervalScheduler} +import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, IntervalScheduler, MagLinkInfo} import com.scalableminds.webknossos.datastore.models.datasource._ import com.scalableminds.webknossos.datastore.models.datasource.inbox.{InboxDataSource, UnusableDataSource} import com.scalableminds.webknossos.datastore.storage.{ @@ -474,7 +475,7 @@ class DataSourceService @Inject()( res } - def datasetInControlledS3(dataSource: DataSource) = { + def datasetInControlledS3(dataSource: DataSource): Boolean = { def commonPrefix(strings: Seq[String]): String = { if (strings.isEmpty) return "" @@ -514,9 +515,7 @@ class DataSourceService @Inject()( .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) .build() - def deleteFromControlledS3(dataSource: DataSource): Fox[Unit] = { - // TODO: Do we handle other datasets using the same layers? - + def deleteFromControlledS3(dataSource: DataSource, datasetId: ObjectId): Fox[Unit] = { def deleteBatch(bucket: String, keys: Seq[String]): Fox[DeleteObjectsResponse] = if (keys.isEmpty) Fox.empty else { @@ -562,18 +561,27 @@ class DataSourceService @Inject()( for { _ <- Fox.successful(()) - paths = dataSource.allExplicitPaths - // Assume everything is in the same bucket - firstPath <- paths.headOption.toFox ?~> "No explicit paths found for dataset in controlled S3" - bucket <- S3DataVault - .hostBucketFromUri(new URI(firstPath)) - .toFox ?~> s"Could not determine S3 bucket from path $firstPath" - prefixes <- Fox.combined(paths.map(path => S3DataVault.objectKeyFromUri(new URI(path)).toFox)) - keys: Seq[String] <- Fox.serialCombined(prefixes)(listKeysAtPrefix(bucket, _)).map(_.flatten) - uniqueKeys = keys.distinct - _ = logger.info( - s"Deleting ${uniqueKeys.length} objects from controlled S3 bucket $bucket for dataset ${dataSource.id}") - _ <- Fox.serialCombined(uniqueKeys.grouped(1000).toSeq)(deleteBatch(bucket, _)).map(_ => ()) + layersAndLinkedMags <- remoteWebknossosClient.fetchPaths(datasetId) + magsLinkedByOtherDatasets: Set[MagLinkInfo] = layersAndLinkedMags + .flatMap(layerInfo => layerInfo.magLinkInfos.filter(_.linkedMags.nonEmpty)) + .toSet + linkedMagPaths = magsLinkedByOtherDatasets.flatMap(_.linkedMags).flatMap(_.path) + paths = dataSource.allExplicitPaths.filterNot(path => linkedMagPaths.contains(path)) + _ <- Fox.runIf(paths.nonEmpty)({ + for { + // Assume everything is in the same bucket + firstPath <- paths.headOption.toFox + bucket <- S3DataVault + .hostBucketFromUri(new URI(firstPath)) + .toFox ?~> s"Could not determine S3 bucket from path $firstPath" + prefixes <- Fox.combined(paths.map(path => S3DataVault.objectKeyFromUri(new URI(path)).toFox)) + keys: Seq[String] <- Fox.serialCombined(prefixes)(listKeysAtPrefix(bucket, _)).map(_.flatten) + uniqueKeys = keys.distinct + _ = logger.info( + s"Deleting ${uniqueKeys.length} objects from controlled S3 bucket $bucket for dataset ${dataSource.id}") + _ <- Fox.serialCombined(uniqueKeys.grouped(1000).toSeq)(deleteBatch(bucket, _)).map(_ => ()) + } yield () + }) } yield () } } From 1b6493b413b6dbe665a190a738b591c6102f72ad Mon Sep 17 00:00:00 2001 From: frcroth Date: Wed, 17 Sep 2025 10:02:30 +0200 Subject: [PATCH 13/62] Lint --- app/models/dataset/Dataset.scala | 4 ++-- .../datastore/controllers/DataSourceController.scala | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index d9645b6a085..0f3041c993b 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -773,8 +773,8 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte case Some(mags) => mags.map(mag => { q"""INSERT INTO webknossos.dataset_mags(_dataset, dataLayerName, mag, path, realPath, axisOrder, channelIndex, credentialId) - VALUES($datasetId, ${layer.name}, ${mag.mag}, ${mag.path}, ${mag.path}, ${mag.axisOrder - .map(Json.toJson(_))}, ${mag.channelIndex}, ${mag.credentialId}) + VALUES($datasetId, ${layer.name}, ${mag.mag}, ${mag.path}, ${mag.path}, ${mag.axisOrder.map( + Json.toJson(_))}, ${mag.channelIndex}, ${mag.credentialId}) """.asUpdate }) case None => diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index fb6e2b23ec0..774563081ca 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -459,7 +459,8 @@ class DataSourceController @Inject()( } yield () } else for { - _ <- Fox.runIf(dataSourceService.datasetInControlledS3(dataSource))(dataSourceService.deleteFromControlledS3(dataSource, datasetId)) + _ <- Fox.runIf(dataSourceService.datasetInControlledS3(dataSource))( + dataSourceService.deleteFromControlledS3(dataSource, datasetId)) _ <- dsRemoteWebknossosClient.deleteDataSource(dataSourceId) _ = logger.warn(s"Tried to delete dataset ${dataSource.id} that is not on disk.") } yield () From 505550c7bde483306280abd2f53e75d782eaae0f Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 23 Sep 2025 10:21:41 +0200 Subject: [PATCH 14/62] update migration guide --- unreleased_changes/8912.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/unreleased_changes/8912.md b/unreleased_changes/8912.md index 69dc6ffe09b..f770b2e6f34 100644 --- a/unreleased_changes/8912.md +++ b/unreleased_changes/8912.md @@ -7,11 +7,9 @@ Add these lines in the "datastore" section to enable S3 upload: ``` s3Upload { - enabled = true - # Use the prefix / name of a credential in the dataVaults section here to use it for uploads. - credentialName = "s3://fsn1.your-objectstorage.com/webknossos-test/upload-tests/" - endpoint = "https://fsn1.your-objectstorage.com" - bucketName = "webknossos-test" - objectKeyPrefix = "upload-tests" - } + enabled = false + # Use the name of a credential in the dataVaults section here to use it for uploads. + credentialName = "s3://example/uri/prefix" + objectKeyPrefix = "webknossos-uploads" +} ``` From 384513d90cda062361673fe039b5ba550086deb5 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 23 Sep 2025 11:37:43 +0200 Subject: [PATCH 15/62] upath --- .../controllers/LegacyController.scala | 3 +- .../services/DataSourceService.scala | 59 +++++-------------- .../services/uploading/UploadService.scala | 12 ++-- 3 files changed, 22 insertions(+), 52 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala index 98c3a8d52e6..74fc9f6a391 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala @@ -72,7 +72,8 @@ class LegacyController @Inject()( None, request.body.initialTeamIds, request.body.folderId, - Some(request.body.requireUniqueName) + Some(request.body.requireUniqueName), + None ) ) ?~> "dataset.upload.validation.failed" } yield diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index 19105ffc505..d174da7d6df 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -8,7 +8,7 @@ import com.scalableminds.util.mvc.Formatter import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.dataformats.{MagLocator, MappingProvider} -import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, IntervalScheduler, PathSchemes, UPath} +import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, IntervalScheduler, UPath} import com.scalableminds.webknossos.datastore.models.datasource._ import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import com.typesafe.scalalogging.LazyLogging @@ -18,7 +18,6 @@ import play.api.inject.ApplicationLifecycle import play.api.libs.json.Json import java.io.File -import java.net.URI import java.nio.file.{Files, Path} import scala.concurrent.ExecutionContext import scala.concurrent.duration._ @@ -251,55 +250,27 @@ class DataSourceService @Inject()( } } - // Prepend newBasePath to all (relative) paths in mags and attachments of the data source. - def prependAllPaths(dataSource: DataSource, newBasePath: String): Fox[DataSource] = { - val replaceUri = (uri: URI) => { - val isRelativeFilePath = (uri.getScheme == null || uri.getScheme.isEmpty || uri.getScheme == PathSchemes.schemeFile) && !uri.isAbsolute - uri.getPath match { - case pathStr if isRelativeFilePath => - new URI(uri.getScheme, - uri.getUserInfo, - uri.getHost, - uri.getPort, - newBasePath + pathStr, - uri.getQuery, - uri.getFragment) - case _ => uri - } - } - + def resolvePathsInNewBasePath(dataSource: DataSource, newBasePath: UPath): Fox[DataSource] = dataSource.toUsable match { case Some(usableDataSource) => - val updatedDataLayers = usableDataSource.dataLayers.map { - case layerWithMagLocators: StaticLayer => - layerWithMagLocators.mapped( - magMapping = mag => - mag.path match { - case Some(pathStr) => mag.copy(path = Some(replaceUri(new URI(pathStr)).toString)) - // If the mag does not have a path, it is an implicit path, we need to make it explicit. - case _ => - mag.copy( - path = Some( - new URI(newBasePath) - .resolve(List(layerWithMagLocators.name, mag.mag.toMagLiteral(true)).mkString("/")) - .toString)) - }, - attachmentMapping = attachment => - DataLayerAttachments( - attachment.meshes.map(a => a.copy(path = replaceUri(a.path))), - attachment.agglomerates.map(a => a.copy(path = replaceUri(a.path))), - attachment.segmentIndex.map(a => a.copy(path = replaceUri(a.path))), - attachment.connectomes.map(a => a.copy(path = replaceUri(a.path))), - attachment.cumsum.map(a => a.copy(path = replaceUri(a.path))) - ) - ) - case layer => layer + val updatedDataLayers = usableDataSource.dataLayers.map { layer => + layer.mapped( + magMapping = mag => + mag.path match { + case Some(existingMagPath) => mag.copy(path = Some(existingMagPath.resolvedIn(newBasePath))) + // If the mag does not have a path, it is an implicit path, we need to make it explicit. + case _ => + mag.copy( + path = Some(newBasePath / layer.name / mag.mag.toMagLiteral(true)) + ) + }, + attachmentMapping = _.resolvedIn(newBasePath) + ) } Fox.successful(usableDataSource.copy(dataLayers = updatedDataLayers)) case None => Fox.failure("Cannot replace paths of unusable datasource") } - } private def resolveAttachmentsAndAddScanned(dataSourcePath: Path, dataSource: UsableDataSource) = dataSource.dataLayers.map(dataLayer => { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 9369e0341c8..ef13652db90 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -2,7 +2,6 @@ package com.scalableminds.webknossos.datastore.services.uploading import com.google.inject.Inject import com.scalableminds.util.accesscontext.TokenContext -import com.scalableminds.util.io.PathUtils.ensureDirectoryBox import com.scalableminds.util.io.{PathUtils, ZipIO} import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant @@ -17,7 +16,7 @@ import com.scalableminds.webknossos.datastore.datareaders.zarr.NgffMetadata.FILE import com.scalableminds.webknossos.datastore.datareaders.zarr.ZarrHeader.FILENAME_DOT_ZARRAY import com.scalableminds.webknossos.datastore.datavault.S3DataVault import com.scalableminds.webknossos.datastore.explore.ExploreLocalLayerService -import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, DirectoryConstants} +import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, DirectoryConstants, UPath} import com.scalableminds.webknossos.datastore.models.UnfinishedUpload import com.scalableminds.webknossos.datastore.models.datasource.UsableDataSource.FILENAME_DATASOURCE_PROPERTIES_JSON import com.scalableminds.webknossos.datastore.models.datasource._ @@ -175,7 +174,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, def getDataSourceIdByUploadId(uploadId: String): Fox[DataSourceId] = getObjectFromRedis[DataSourceId](redisKeyForDataSourceId(uploadId)) - def getDatasetIdByUploadId(uploadId: String): Fox[ObjectId] = + private def getDatasetIdByUploadId(uploadId: String): Fox[ObjectId] = getObjectFromRedis[ObjectId](redisKeyForDatasetId(uploadId)) def reserveUpload(reserveUploadInfo: ReserveUploadInformation, @@ -395,9 +394,8 @@ class UploadService @Inject()(dataSourceService: DataSourceService, s"Upload of dataset ${dataSourceId.organizationId}/${dataSourceId.directoryName} to S3", logger) endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.credentialName).getHost - s3DataSource <- dataSourceService.prependAllPaths(dataSource, - newBasePath = - s"s3://$endPointHost/$s3UploadBucket/$s3ObjectKey") + newBasePath <- UPath.fromString(s"s3://$endPointHost/$s3UploadBucket/$s3ObjectKey").toFox + s3DataSource <- dataSourceService.resolvePathsInNewBasePath(dataSource, newBasePath) _ <- remoteWebknossosClient.updateDataSource(s3DataSource, datasetId) datasetSize <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox _ = this.synchronized { @@ -572,7 +570,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, .flatten .toSet neededPaths = usableDataSource.dataLayers - .flatMap(layer => layer.allExplicitPaths) + .flatMap(layer => layer.allExplicitPaths.flatMap(_.toLocalPath)) .map(dataDir.resolve) .toSet ++ explicitPaths allFiles <- PathUtils.listFilesRecursive(dataDir, silent = true, maxDepth = 10).toFox From 18a9fbd2f7b7cece4ad3179183865c39c5ff1d03 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 24 Sep 2025 11:55:28 +0200 Subject: [PATCH 16/62] WIP adapt upload to use datasetId where possible --- .../WKRemoteDataStoreController.scala | 6 +- app/models/dataset/DatasetService.scala | 6 +- .../controllers/DataSourceController.scala | 55 ++++++++----------- .../services/AccessTokenService.scala | 13 +---- .../services/DSRemoteWebknossosClient.scala | 28 ++++------ .../services/uploading/UploadService.scala | 39 ++++++------- 6 files changed, 64 insertions(+), 83 deletions(-) diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index 05304bfcb43..3725a329269 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -7,7 +7,7 @@ import com.scalableminds.util.tools.{Fox, Full} import com.scalableminds.webknossos.datastore.controllers.JobExportProperties import com.scalableminds.webknossos.datastore.helpers.{LayerMagLinkInfo, MagLinkInfo} import com.scalableminds.webknossos.datastore.models.UnfinishedUpload -import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, DataSource} +import com.scalableminds.webknossos.datastore.models.datasource.DataSource import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataStoreStatus} import com.scalableminds.webknossos.datastore.services.uploading.{ LegacyLinkedLayerIdentifier, @@ -243,11 +243,11 @@ class WKRemoteDataStoreController @Inject()( /** * Called by the datastore after a dataset has been deleted on disk. */ - def deleteDataset(name: String, key: String): Action[DataSourceId] = Action.async(validateJson[DataSourceId]) { + def deleteDataset(name: String, key: String): Action[ObjectId] = Action.async(validateJson[ObjectId]) { implicit request => dataStoreService.validateAccess(name, key) { _ => for { - existingDatasetBox <- datasetDAO.findOneByDataSourceId(request.body)(GlobalAccessContext).shiftBox + existingDatasetBox <- datasetDAO.findOne(request.body)(GlobalAccessContext).shiftBox _ <- existingDatasetBox match { case Full(dataset) => for { diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index c939cf480d6..d0d1160ff73 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -76,15 +76,15 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, _ <- Fox.fromBool(!isDatasetNameAlreadyTaken) ?~> "dataset.name.alreadyTaken" } yield () - // TODO may get isVirtual=true too. Change to createVirtualDataset? + // TODO consolidate with createVirtualDataset? def createPreliminaryDataset(newDatasetId: ObjectId, datasetName: String, datasetDirectoryName: String, organizationId: String, dataStore: DataStore): Fox[Dataset] = { - val unreportedDatasource = + val unusableDataSource = UnusableDataSource(DataSourceId(datasetDirectoryName, organizationId), None, DataSourceStatus.notYetUploaded) - createDataset(dataStore, newDatasetId, datasetName, unreportedDatasource) + createDataset(dataStore, newDatasetId, datasetName, unusableDataSource, isVirtual = true) } def createVirtualDataset(datasetName: String, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index b84fa2b372e..752429c9ab0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -68,7 +68,6 @@ class DataSourceController @Inject()( exploreRemoteLayerService: ExploreRemoteLayerService, uploadService: UploadService, meshFileService: MeshFileService, - dataStoreConfig: DataStoreConfig, remoteSourceDescriptorService: RemoteSourceDescriptorService, val dsRemoteWebknossosClient: DSRemoteWebknossosClient, val dsRemoteTracingstoreClient: DSRemoteTracingstoreClient, @@ -96,12 +95,11 @@ class DataSourceController @Inject()( UserAccessRequest.administrateDataSources(request.body.organization)) { for { isKnownUpload <- uploadService.isKnownUpload(request.body.uploadId) - shouldBeVirtual = dataStoreConfig.Datastore.S3Upload.enabled - reserveUploadInformation = request.body.copy(isVirtual = Some(shouldBeVirtual)) _ <- if (!isKnownUpload) { - (dsRemoteWebknossosClient.reserveDataSourceUpload(reserveUploadInformation) ?~> "dataset.upload.validation.failed") - .flatMap(reserveUploadAdditionalInfo => - uploadService.reserveUpload(reserveUploadInformation, reserveUploadAdditionalInfo)) + for { + reserveUploadAdditionalInfo <- dsRemoteWebknossosClient.reserveDataSourceUpload(request.body) ?~> "dataset.upload.validation.failed" + _ <- uploadService.reserveUpload(request.body, reserveUploadAdditionalInfo) + } yield () } else Fox.successful(()) } yield Ok } @@ -151,10 +149,9 @@ class DataSourceController @Inject()( success = { case (chunkNumber, chunkSize, currentChunkSize, totalChunkCount, uploadFileId) => for { - dataSourceId <- uploadService.getDataSourceIdByUploadId( - uploadService.extractDatasetUploadId(uploadFileId)) ?~> "dataset.upload.validation.failed" - result <- accessTokenService.validateAccessFromTokenContext( - UserAccessRequest.writeDataSource(dataSourceId)) { + datasetId <- uploadService + .getDatasetIdByUploadId(uploadService.extractDatasetUploadId(uploadFileId)) ?~> "dataset.upload.validation.failed" + result <- accessTokenService.validateAccessFromTokenContext(UserAccessRequest.writeDataset(datasetId)) { for { isKnownUpload <- uploadService.isKnownUploadByFileId(uploadFileId) _ <- Fox.fromBool(isKnownUpload) ?~> "dataset.upload.validation.failed" @@ -175,9 +172,8 @@ class DataSourceController @Inject()( def testChunk(resumableChunkNumber: Int, resumableIdentifier: String): Action[AnyContent] = Action.async { implicit request => for { - dataSourceId <- uploadService.getDataSourceIdByUploadId( - uploadService.extractDatasetUploadId(resumableIdentifier)) ?~> "dataset.upload.validation.failed" - result <- accessTokenService.validateAccessFromTokenContext(UserAccessRequest.writeDataSource(dataSourceId)) { + datasetId <- uploadService.getDatasetIdByUploadId(uploadService.extractDatasetUploadId(resumableIdentifier)) ?~> "dataset.upload.validation.failed" + result <- accessTokenService.validateAccessFromTokenContext(UserAccessRequest.writeDataset(datasetId)) { for { isKnownUpload <- uploadService.isKnownUploadByFileId(resumableIdentifier) _ <- Fox.fromBool(isKnownUpload) ?~> "dataset.upload.validation.failed" @@ -190,19 +186,16 @@ class DataSourceController @Inject()( def finishUpload(): Action[UploadInformation] = Action.async(validateJson[UploadInformation]) { implicit request => log() { for { - dataSourceId <- uploadService - .getDataSourceIdByUploadId(request.body.uploadId) ?~> "dataset.upload.validation.failed" - response <- accessTokenService.validateAccessFromTokenContext(UserAccessRequest.writeDataSource(dataSourceId)) { + datasetId <- uploadService.getDatasetIdByUploadId(request.body.uploadId) ?~> "dataset.upload.validation.failed" + response <- accessTokenService.validateAccessFromTokenContext(UserAccessRequest.writeDataset(datasetId)) { for { - (dataSourceId, datasetSizeBytes) <- uploadService - .finishUpload(request.body) ?~> "dataset.upload.finishFailed" - uploadedDatasetIdJson <- dsRemoteWebknossosClient.reportUpload( - dataSourceId, + (datasetId, datasetSizeBytes) <- uploadService.finishUpload(request.body) ?~> "dataset.upload.finishFailed" + _ <- dsRemoteWebknossosClient.reportUpload( + datasetId, datasetSizeBytes, - request.body.needsConversion.getOrElse(false), - viaAddRoute = false + request.body.needsConversion.getOrElse(false) ) ?~> "reportUpload.failed" - } yield Ok(Json.obj("newDatasetId" -> uploadedDatasetIdJson)) + } yield Ok(Json.obj("newDatasetId" -> datasetId)) } } yield response } @@ -210,14 +203,14 @@ class DataSourceController @Inject()( def cancelUpload(): Action[CancelUploadInformation] = Action.async(validateJson[CancelUploadInformation]) { implicit request => - val dataSourceIdFox = uploadService.isKnownUpload(request.body.uploadId).flatMap { + val datasetIdFox = uploadService.isKnownUpload(request.body.uploadId).flatMap { case false => Fox.failure("dataset.upload.validation.failed") - case true => uploadService.getDataSourceIdByUploadId(request.body.uploadId) + case true => uploadService.getDatasetIdByUploadId(request.body.uploadId) } - dataSourceIdFox.flatMap { dataSourceId => - accessTokenService.validateAccessFromTokenContext(UserAccessRequest.deleteDataSource(dataSourceId)) { + datasetIdFox.flatMap { datasetId => + accessTokenService.validateAccessFromTokenContext(UserAccessRequest.deleteDataset(datasetId)) { for { - _ <- dsRemoteWebknossosClient.deleteDataSource(dataSourceId) ?~> "dataset.delete.webknossos.failed" + _ <- dsRemoteWebknossosClient.deleteDataset(datasetId) ?~> "dataset.delete.webknossos.failed" _ <- uploadService.cancelUpload(request.body) ?~> "Could not cancel the upload." } yield Ok } @@ -413,12 +406,12 @@ class DataSourceController @Inject()( dataSourceId.directoryName, Some(datasetId), reason = Some("the user wants to delete the dataset")) ?~> "dataset.delete.failed" - _ <- dsRemoteWebknossosClient.deleteDataSource(dataSourceId) + _ <- dsRemoteWebknossosClient.deleteDataset(datasetId) } yield () } else for { - _ <- dsRemoteWebknossosClient.deleteDataSource(dataSourceId) - _ = logger.warn(s"Tried to delete dataset ${dataSource.id} that is not on disk.") + _ <- dsRemoteWebknossosClient.deleteDataset(datasetId) + _ = logger.warn(s"Tried to delete dataset ${dataSource.id} ($datasetId), but is not present on disk.") } yield () } yield Ok } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AccessTokenService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AccessTokenService.scala index ef68bc011b8..f04408ba6fa 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AccessTokenService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/AccessTokenService.scala @@ -31,12 +31,12 @@ case class UserAccessRequest(resourceId: DataSourceId, resourceType: AccessResou object UserAccessRequest { implicit val jsonFormat: OFormat[UserAccessRequest] = Json.format[UserAccessRequest] - def deleteDataSource(dataSourceId: DataSourceId): UserAccessRequest = - UserAccessRequest(dataSourceId, AccessResourceType.datasource, AccessMode.delete) def administrateDataSources: UserAccessRequest = UserAccessRequest(DataSourceId("", ""), AccessResourceType.datasource, AccessMode.administrate) + def administrateDataSources(organizationId: String): UserAccessRequest = UserAccessRequest(DataSourceId("", organizationId), AccessResourceType.datasource, AccessMode.administrate) + def readDataSources(dataSourceId: DataSourceId): UserAccessRequest = UserAccessRequest(dataSourceId, AccessResourceType.datasource, AccessMode.read) @@ -49,9 +49,6 @@ object UserAccessRequest { def deleteDataset(datasetId: ObjectId): UserAccessRequest = UserAccessRequest(DataSourceId(datasetId.toString, ""), AccessResourceType.dataset, AccessMode.delete) - def writeDataSource(dataSourceId: DataSourceId): UserAccessRequest = - UserAccessRequest(dataSourceId, AccessResourceType.datasource, AccessMode.write) - def writeDataset(datasetId: ObjectId): UserAccessRequest = UserAccessRequest(DataSourceId(datasetId.toString, ""), AccessResourceType.dataset, AccessMode.write) @@ -99,12 +96,6 @@ trait AccessTokenService { accessAnswersCache.getOrLoad((accessRequest, tc.userTokenOpt), _ => remoteWebknossosClient.requestUserAccess(accessRequest)) - def assertUserAccess(accessRequest: UserAccessRequest)(implicit ec: ExecutionContext, tc: TokenContext): Fox[Unit] = - for { - userAccessAnswer <- hasUserAccess(accessRequest) ?~> "Failed to check data access, token may be expired, consider reloading." - _ <- Fox.fromBool(userAccessAnswer.granted) ?~> userAccessAnswer.msg.getOrElse("Access forbidden.") - } yield () - private def executeBlockOnPositiveAnswer(userAccessAnswer: UserAccessAnswer, block: => Future[Result]): Future[Result] = userAccessAnswer match { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala index d9189d9251f..ea8dfda4a3b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala @@ -7,7 +7,7 @@ import com.scalableminds.util.accesscontext.TokenContext import com.scalableminds.util.cache.AlfuCache import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.objectid.ObjectId -import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} +import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.controllers.JobExportProperties import com.scalableminds.webknossos.datastore.helpers.{IntervalScheduler, LayerMagLinkInfo, UPath} @@ -94,19 +94,15 @@ class DSRemoteWebknossosClient @Inject()( .getWithJsonResponse[List[UnfinishedUpload]] } yield unfinishedUploads - def reportUpload(dataSourceId: DataSourceId, datasetSizeBytes: Long, needsConversion: Boolean, viaAddRoute: Boolean)( - implicit tc: TokenContext): Fox[String] = - for { - uploadedDatasetIdJson <- rpc(s"$webknossosUri/api/datastores/$dataStoreName/reportDatasetUpload") - .addQueryString("key" -> dataStoreKey) - .addQueryString("datasetDirectoryName" -> dataSourceId.directoryName) - .addQueryString("needsConversion" -> needsConversion.toString) - .addQueryString("viaAddRoute" -> viaAddRoute.toString) - .addQueryString("datasetSizeBytes" -> datasetSizeBytes.toString) - .withTokenFromContext - .postEmptyWithJsonResponse[JsValue]() - uploadedDatasetId <- JsonHelper.as[String](uploadedDatasetIdJson \ "id").toFox ?~> "uploadedDatasetId.invalid" - } yield uploadedDatasetId + def reportUpload(datasetId: ObjectId, datasetSizeBytes: Long, needsConversion: Boolean)( + implicit tc: TokenContext): Fox[_] = + rpc(s"$webknossosUri/api/datastores/$dataStoreName/reportDatasetUpload") + .addQueryString("key" -> dataStoreKey) + .addQueryString("datasetId" -> datasetId.toString) + .addQueryString("needsConversion" -> needsConversion.toString) + .addQueryString("datasetSizeBytes" -> datasetSizeBytes.toString) + .withTokenFromContext + .postEmptyWithJsonResponse[JsValue]() def reportDataSources(dataSources: List[DataSource], organizationId: Option[String]): Fox[_] = rpc(s"$webknossosUri/api/datastores/$dataStoreName/datasources") @@ -141,10 +137,10 @@ class DSRemoteWebknossosClient @Inject()( .withTokenFromContext .putJson(dataSource) - def deleteDataSource(id: DataSourceId): Fox[_] = + def deleteDataset(datasetId: ObjectId): Fox[_] = rpc(s"$webknossosUri/api/datastores/$dataStoreName/deleteDataset") .addQueryString("key" -> dataStoreKey) - .postJson(id) + .postJson(datasetId) def getJobExportProperties(jobId: String): Fox[JobExportProperties] = rpc(s"$webknossosUri/api/datastores/$dataStoreName/jobExportProperties") diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index ef13652db90..dc29f64068c 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -54,7 +54,7 @@ case class ReserveUploadInformation( initialTeams: List[ObjectId], // team ids folderId: Option[ObjectId], requireUniqueName: Option[Boolean], - isVirtual: Option[Boolean] // TODO rethink? + isVirtual: Option[Boolean] // Only set (to false) for legacy manual uploads ) object ReserveUploadInformation { implicit val reserveUploadInformation: OFormat[ReserveUploadInformation] = Json.format[ReserveUploadInformation] @@ -126,6 +126,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, * uploadId -> fileCount * uploadId -> set(fileName) * uploadId -> dataSourceId + * uploadId -> datasetId * uploadId -> linkedLayerIdentifier * uploadId#fileName -> totalChunkCount * uploadId#fileName -> set(chunkIndices) @@ -171,10 +172,10 @@ class UploadService @Inject()(dataSourceService: DataSourceService, private def s3UploadDirectory(organizationId: String, uploadId: String): Path = dataBaseDir.resolve(organizationId).resolve(uploadingDir).resolve(uploadToS3Dir).resolve(uploadId) - def getDataSourceIdByUploadId(uploadId: String): Fox[DataSourceId] = + private def getDataSourceIdByUploadId(uploadId: String): Fox[DataSourceId] = getObjectFromRedis[DataSourceId](redisKeyForDataSourceId(uploadId)) - private def getDatasetIdByUploadId(uploadId: String): Fox[ObjectId] = + def getDatasetIdByUploadId(uploadId: String): Fox[ObjectId] = getObjectFromRedis[ObjectId](redisKeyForDatasetId(uploadId)) def reserveUpload(reserveUploadInfo: ReserveUploadInformation, @@ -212,7 +213,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, Json.stringify(Json.toJson(LinkedLayerIdentifiers(reserveUploadAdditionalInfo.layersToLink))) ) _ = logger.info( - f"Reserving dataset upload of ${reserveUploadInfo.organization}/${reserveUploadInfo.name} with id ${reserveUploadInfo.uploadId}...") + f"Reserving dataset upload of ${reserveUploadInfo.organization}/${reserveUploadInfo.name} with uploadId ${reserveUploadInfo.uploadId}...") } yield () def addUploadIdsToUnfinishedUploads( @@ -274,7 +275,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, chunkFile: File): Fox[Unit] = { val uploadId = extractDatasetUploadId(uploadFileId) for { - dataSourceId <- getDataSourceIdByUploadId(uploadId) + datasetId <- getDatasetIdByUploadId(uploadId) (filePath, uploadDir) <- getFilePathAndDirOfUploadId(uploadFileId) isFileKnown <- runningUploadMetadataStore.contains(redisKeyForFileChunkCount(uploadId, filePath)) totalFileSizeInBytesOpt <- runningUploadMetadataStore.findLong(redisKeyForTotalFileSizeInBytes(uploadId)) @@ -316,7 +317,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, runningUploadMetadataStore.removeFromSet(redisKeyForFileChunkSet(uploadId, filePath), String.valueOf(currentChunkNumber)) val errorMsg = - s"Error receiving chunk $currentChunkNumber for upload ${dataSourceId.directoryName}: ${e.getMessage}" + s"Error receiving chunk $currentChunkNumber for uploadId $uploadId (datsetId $datasetId): ${e.getMessage}" logger.warn(errorMsg) Fox.failure(errorMsg) } @@ -327,22 +328,19 @@ class UploadService @Inject()(dataSourceService: DataSourceService, val uploadId = cancelUploadInformation.uploadId for { dataSourceId <- getDataSourceIdByUploadId(uploadId) + datasetId <- getDatasetIdByUploadId(uploadId) knownUpload <- isKnownUpload(uploadId) } yield if (knownUpload) { - logger.info( - f"Cancelling dataset upload of ${dataSourceId.organizationId}/${dataSourceId.directoryName} with id $uploadId...") + logger.info(f"Cancelling dataset upload of uploadId $uploadId (datasetId $datasetId)...") for { _ <- removeFromRedis(uploadId) _ <- PathUtils.deleteDirectoryRecursively(uploadDirectory(dataSourceId.organizationId, uploadId)).toFox } yield () - } else { - Fox.failure(s"Unknown upload") - } + } else Fox.failure(s"Unknown upload") } - def finishUpload(uploadInformation: UploadInformation, checkCompletion: Boolean = true)( - implicit tc: TokenContext): Fox[(DataSourceId, Long)] = { + def finishUpload(uploadInformation: UploadInformation)(implicit tc: TokenContext): Fox[(ObjectId, Long)] = { val uploadId = uploadInformation.uploadId for { @@ -362,7 +360,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ = logger.info( s"Finishing dataset upload of ${dataSourceId.organizationId}/${dataSourceId.directoryName} with id $uploadId...") - _ <- Fox.runIf(checkCompletion)(ensureAllChunksUploaded(uploadId)) + _ <- checkAllChunksUploaded(uploadId) unpackToDir = dataSourceDirFor(dataSourceId, datasetNeedsConversion, uploadToS3) _ <- ensureDirectoryBox(unpackToDir.getParent).toFox ?~> "dataset.import.fileAccessDenied" @@ -371,6 +369,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, datasetId <- getDatasetIdByUploadId(uploadId) _ <- cleanUpUploadedDataset(uploadDir, uploadId) _ <- cleanUpOnFailure(unpackResult, + datasetId, dataSourceId, datasetNeedsConversion, label = s"unpacking to dataset to $unpackToDir") @@ -379,6 +378,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, dataSourceId, linkedLayerInfo.layersToLink).shiftBox _ <- cleanUpOnFailure(postProcessingResult, + datasetId, dataSourceId, datasetNeedsConversion, label = s"processing dataset at $unpackToDir") @@ -408,7 +408,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, datasetSize <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox } yield datasetSize } - } yield (dataSourceId, datasetSizeBytes) + } yield (datasetId, datasetSizeBytes) } private def postProcessUploadedDataSource(datasetNeedsConversion: Boolean, @@ -578,6 +578,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } yield filesToDelete private def cleanUpOnFailure[T](result: Box[T], + datasetId: ObjectId, dataSourceId: DataSourceId, datasetNeedsConversion: Boolean, label: String): Fox[Unit] = @@ -598,13 +599,13 @@ class UploadService @Inject()(dataSourceService: DataSourceService, None, datasetNeedsConversion, Some("the upload failed")) - remoteWebknossosClient.deleteDataSource(dataSourceId) + remoteWebknossosClient.deleteDataset(datasetId) for { _ <- result.toFox ?~> f"Error while $label" } yield () } - private def ensureAllChunksUploaded(uploadId: String): Fox[Unit] = + private def checkAllChunksUploaded(uploadId: String): Fox[Unit] = for { fileCountStringOpt <- runningUploadMetadataStore.find(redisKeyForFileCount(uploadId)) fileCountString <- fileCountStringOpt.toFox ?~> "dataset.upload.noFiles" @@ -827,9 +828,9 @@ class UploadService @Inject()(dataSourceService: DataSourceService, private def cleanUpDatasetExceedingSize(uploadDir: Path, uploadId: String): Fox[Unit] = for { - dataSourceId <- getDataSourceIdByUploadId(uploadId) + datasetId <- getDatasetIdByUploadId(uploadId) _ <- cleanUpUploadedDataset(uploadDir, uploadId) - _ <- remoteWebknossosClient.deleteDataSource(dataSourceId) + _ <- remoteWebknossosClient.deleteDataset(datasetId) } yield () private def removeFromRedis(uploadId: String): Fox[Unit] = From b021cd8af7e87988911ba0e85539dd3f93420252 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 24 Sep 2025 13:59:53 +0200 Subject: [PATCH 17/62] move to target, pass linked layers to wk side --- .../controllers/DataSourceController.scala | 8 +- .../helpers/DirectoryConstants.scala | 2 +- .../models/datasource/DataSourceStatus.scala | 1 + .../services/DataSourceService.scala | 6 +- .../uploading/DatasetSymlinkService.scala | 31 ---- .../services/uploading/UploadService.scala | 170 +++++++----------- 6 files changed, 71 insertions(+), 147 deletions(-) delete mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/DatasetSymlinkService.scala diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 752429c9ab0..e2eb7cfa16a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -6,7 +6,6 @@ import com.scalableminds.util.geometry.Vec3Int import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Box, Empty, Failure, Fox, FoxImplicits, Full} -import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.ListOfLong.ListOfLong import com.scalableminds.webknossos.datastore.explore.{ ExploreRemoteDatasetRequest, @@ -189,12 +188,7 @@ class DataSourceController @Inject()( datasetId <- uploadService.getDatasetIdByUploadId(request.body.uploadId) ?~> "dataset.upload.validation.failed" response <- accessTokenService.validateAccessFromTokenContext(UserAccessRequest.writeDataset(datasetId)) { for { - (datasetId, datasetSizeBytes) <- uploadService.finishUpload(request.body) ?~> "dataset.upload.finishFailed" - _ <- dsRemoteWebknossosClient.reportUpload( - datasetId, - datasetSizeBytes, - request.body.needsConversion.getOrElse(false) - ) ?~> "reportUpload.failed" + datasetId <- uploadService.finishUpload(request.body) ?~> "dataset.upload.finishFailed" } yield Ok(Json.obj("newDatasetId" -> datasetId)) } } yield response diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DirectoryConstants.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DirectoryConstants.scala index 0f9f9366023..710e0c231e2 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DirectoryConstants.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DirectoryConstants.scala @@ -4,5 +4,5 @@ trait DirectoryConstants { val forConversionDir = ".forConversion" val trashDir = ".trash" val uploadingDir: String = ".uploading" - val uploadToS3Dir = ".cloud" + val unpackedDir = ".unpacked" } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataSourceStatus.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataSourceStatus.scala index 4bb7eb053da..e3fe10395cc 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataSourceStatus.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/models/datasource/DataSourceStatus.scala @@ -5,6 +5,7 @@ object DataSourceStatus { val deletedByUser: String = "Deleted by user." val notYetUploaded = "Not yet fully uploaded." val notYetUploadedToPaths = "Not yet marked as fully uploaded to paths." + val notImportedYet: String = "Not imported yet." val unreportedStatusList: Seq[String] = List(unreported, deletedByUser) val inactiveStatusList: Seq[String] = List(unreported, notYetUploaded, notYetUploadedToPaths, deletedByUser) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index d174da7d6df..efef9384d27 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -202,9 +202,9 @@ class DataSourceService @Inject()( } } - def exploreMappings(organizationId: String, datasetName: String, dataLayerName: String): Set[String] = + def exploreMappings(organizationId: String, datasetDirectoryName: String, dataLayerName: String): Set[String] = MappingProvider - .exploreMappings(dataBaseDir.resolve(organizationId).resolve(datasetName).resolve(dataLayerName)) + .exploreMappings(dataBaseDir.resolve(organizationId).resolve(datasetDirectoryName).resolve(dataLayerName)) .getOrElse(Set()) private def scanOrganizationDirForDataSources(path: Path): List[DataSource] = { @@ -246,7 +246,7 @@ class DataSourceService @Inject()( existingDataSourceProperties = JsonHelper.parseFromFile(propertiesFile, path).toOption) } } else { - UnusableDataSource(id, None, "Not imported yet.") + UnusableDataSource(id, None, DataSourceStatus.notImportedYet) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/DatasetSymlinkService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/DatasetSymlinkService.scala deleted file mode 100644 index d1d9eba8ecd..00000000000 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/DatasetSymlinkService.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.scalableminds.webknossos.datastore.services.uploading - -import com.scalableminds.util.tools.{Fox, FoxImplicits} -import com.scalableminds.webknossos.datastore.services.DataSourceService -import com.scalableminds.util.tools.Box.tryo - -import java.nio.file.{Files, Path} -import javax.inject.Inject -import scala.concurrent.ExecutionContext - -class DatasetSymlinkService @Inject()(dataSourceService: DataSourceService)(implicit ec: ExecutionContext) - extends FoxImplicits { - - val dataBaseDir: Path = dataSourceService.dataBaseDir - def addSymlinksToOtherDatasetLayers(datasetDir: Path, layersToLink: List[LegacyLinkedLayerIdentifier]): Fox[Unit] = - Fox - .serialCombined(layersToLink) { layerToLink => - val layerPath = layerToLink.pathIn(dataBaseDir) - val newLayerPath = datasetDir.resolve(layerToLink.newLayerName.getOrElse(layerToLink.layerName)) - for { - _ <- Fox.fromBool(!Files.exists(newLayerPath)) ?~> s"Cannot symlink layer at $newLayerPath: a layer with this name already exists." - _ <- Fox.fromBool(Files.exists(layerPath)) ?~> s"Cannot symlink to layer at $layerPath: The layer does not exist." - _ <- tryo { - Files.createSymbolicLink(newLayerPath, newLayerPath.getParent.relativize(layerPath)) - }.toFox ?~> s"Failed to create symlink at $newLayerPath." - } yield () - } - .map { _ => - () - } -} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index dc29f64068c..cefcac380cb 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -113,7 +113,6 @@ class UploadService @Inject()(dataSourceService: DataSourceService, runningUploadMetadataStore: DataStoreRedisStore, remoteSourceDescriptorService: RemoteSourceDescriptorService, exploreLocalLayerService: ExploreLocalLayerService, - datasetSymlinkService: DatasetSymlinkService, dataStoreConfig: DataStoreConfig, val remoteWebknossosClient: DSRemoteWebknossosClient)(implicit ec: ExecutionContext) extends DatasetDeleter @@ -169,9 +168,6 @@ class UploadService @Inject()(dataSourceService: DataSourceService, private def uploadDirectory(organizationId: String, uploadId: String): Path = dataBaseDir.resolve(organizationId).resolve(uploadingDir).resolve(uploadId) - private def s3UploadDirectory(organizationId: String, uploadId: String): Path = - dataBaseDir.resolve(organizationId).resolve(uploadingDir).resolve(uploadToS3Dir).resolve(uploadId) - private def getDataSourceIdByUploadId(uploadId: String): Fox[DataSourceId] = getObjectFromRedis[DataSourceId](redisKeyForDataSourceId(uploadId)) @@ -340,15 +336,8 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } else Fox.failure(s"Unknown upload") } - def finishUpload(uploadInformation: UploadInformation)(implicit tc: TokenContext): Fox[(ObjectId, Long)] = { - val uploadId = uploadInformation.uploadId - + private def assertWithinRequestedFileSizeAndCleanUpOtherwise(uploadDir: Path, uploadId: String): Fox[Unit] = for { - dataSourceId <- getDataSourceIdByUploadId(uploadId) - datasetNeedsConversion = uploadInformation.needsConversion.getOrElse(false) - uploadDir = uploadDirectory(dataSourceId.organizationId, uploadId) - uploadToS3 = dataStoreConfig.Datastore.S3Upload.enabled - totalFileSizeInBytesOpt <- runningUploadMetadataStore.find(redisKeyForTotalFileSizeInBytes(uploadId)) _ <- Fox.runOptional(totalFileSizeInBytesOpt) { maxFileSize => tryo(FileUtils.sizeOfDirectoryAsBigInteger(uploadDir.toFile).longValue).toFox.map(actualFileSize => @@ -357,64 +346,80 @@ class UploadService @Inject()(dataSourceService: DataSourceService, Fox.failure(s"Uploaded dataset exceeds the maximum allowed size of $maxFileSize bytes") } else Fox.successful(())) } + } yield () - _ = logger.info( - s"Finishing dataset upload of ${dataSourceId.organizationId}/${dataSourceId.directoryName} with id $uploadId...") + def finishUpload(uploadInformation: UploadInformation)(implicit tc: TokenContext): Fox[ObjectId] = { + val uploadId = uploadInformation.uploadId + + for { + dataSourceId <- getDataSourceIdByUploadId(uploadId) + datasetId <- getDatasetIdByUploadId(uploadId) + _ = logger.info(s"Finishing dataset upload $uploadId of datasetId $datasetId ($dataSourceId)...") + needsConversion = uploadInformation.needsConversion.getOrElse(false) + uploadDir = uploadDirectory(dataSourceId.organizationId, uploadId) + isS3UploadEnabled = dataStoreConfig.Datastore.S3Upload.enabled + + _ <- assertWithinRequestedFileSizeAndCleanUpOtherwise(uploadDir, uploadId) _ <- checkAllChunksUploaded(uploadId) - unpackToDir = dataSourceDirFor(dataSourceId, datasetNeedsConversion, uploadToS3) + unpackToDir = unpackToDirFor(dataSourceId, needsConversion) _ <- ensureDirectoryBox(unpackToDir.getParent).toFox ?~> "dataset.import.fileAccessDenied" unpackResult <- unpackDataset(uploadDir, unpackToDir).shiftBox - linkedLayerInfo <- getObjectFromRedis[LinkedLayerIdentifiers](redisKeyForLinkedLayerIdentifier(uploadId)) - datasetId <- getDatasetIdByUploadId(uploadId) _ <- cleanUpUploadedDataset(uploadDir, uploadId) _ <- cleanUpOnFailure(unpackResult, datasetId, dataSourceId, - datasetNeedsConversion, + needsConversion, label = s"unpacking to dataset to $unpackToDir") - postProcessingResult <- postProcessUploadedDataSource(datasetNeedsConversion, - unpackToDir, - dataSourceId, - linkedLayerInfo.layersToLink).shiftBox + postProcessingResult <- Fox + .runIf(!needsConversion)(postProcessUploadedDataSource(needsConversion, unpackToDir, dataSourceId)) + .shiftBox _ <- cleanUpOnFailure(postProcessingResult, datasetId, dataSourceId, - datasetNeedsConversion, + needsConversion, label = s"processing dataset at $unpackToDir") dataSource = dataSourceService.dataSourceFromDir(unpackToDir, dataSourceId.organizationId) - datasetSizeBytes: Long <- if (uploadToS3 && !datasetNeedsConversion) { - for { - _ <- Fox.successful(()) - beforeS3Upload = Instant.now - s3UploadBucket <- s3UploadBucketOpt.toFox - s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/$uploadId/" - _ <- uploadDirectoryToS3(unpackToDir, dataSource, s3UploadBucket, s3ObjectKey) - _ = Instant.logSince(beforeS3Upload, - s"Upload of dataset ${dataSourceId.organizationId}/${dataSourceId.directoryName} to S3", - logger) - endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.credentialName).getHost - newBasePath <- UPath.fromString(s"s3://$endPointHost/$s3UploadBucket/$s3ObjectKey").toFox - s3DataSource <- dataSourceService.resolvePathsInNewBasePath(dataSource, newBasePath) - _ <- remoteWebknossosClient.updateDataSource(s3DataSource, datasetId) - datasetSize <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox - _ = this.synchronized { - PathUtils.deleteDirectoryRecursively(unpackToDir) - } - } yield datasetSize - } else { - for { - _ <- remoteWebknossosClient.reportDataSource(dataSource) - datasetSize <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox - } yield datasetSize - } - } yield (datasetId, datasetSizeBytes) + datasetSizeBytes <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox + dataSourceWithAbsolutePathsOpt <- moveUnpackedToTarget(unpackToDir) + + linkedLayerIdentifiers <- getObjectFromRedis[LinkedLayerIdentifiers](redisKeyForLinkedLayerIdentifier(uploadId)) + _ <- remoteWebknossosClient.reportUpload( + datasetId, + datasetSizeBytes, + dataSourceWithAbsolutePathsOpt, + linkedLayerIdentifiers, + uploadInformation.needsConversion.getOrElse(false) + ) ?~> "reportUpload.failed" + } yield datasetId } + private def moveUnpackedToTarget(path: Path, needsConversion: Boolean, dataSourceId: DataSourceId): Fox[DataSource] = + // TODO move either to S3, to orgaDir/direcctoryName locally or to forConversion. Adapt paths + if (!needsConversion) { + + for { + _ <- Fox.successful(()) + s3UploadBucket <- s3UploadBucketOpt.toFox + beforeS3Upload = Instant.now + s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/$uploadId/" + _ <- uploadDirectoryToS3(unpackToDir, dataSource, s3UploadBucket, s3ObjectKey) + _ = Instant.logSince(beforeS3Upload, + s"Upload of dataset ${dataSourceId.organizationId}/${dataSourceId.directoryName} to S3", + logger) + endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.credentialName).getHost + newBasePath <- UPath.fromString(s"s3://$endPointHost/$s3UploadBucket/$s3ObjectKey").toFox + s3DataSource <- dataSourceService.resolvePathsInNewBasePath(dataSource, newBasePath) + _ <- remoteWebknossosClient.updateDataSource(s3DataSource, datasetId) + _ = this.synchronized { + PathUtils.deleteDirectoryRecursively(unpackToDir) + } + } yield () + } else Fox.successful(()) + private def postProcessUploadedDataSource(datasetNeedsConversion: Boolean, unpackToDir: Path, - dataSourceId: DataSourceId, - layersToLink: Option[List[LegacyLinkedLayerIdentifier]]): Fox[Unit] = + dataSourceId: DataSourceId): Fox[Unit] = if (datasetNeedsConversion) Fox.successful(()) else { @@ -432,10 +437,6 @@ class UploadService @Inject()(dataSourceService: DataSourceService, tryExploringMultipleLayers(unpackToDir, dataSourceId, uploadedDataSourceType) case UploadedDataSourceType.WKW => addLayerAndMagDirIfMissing(unpackToDir).toFox } - _ <- datasetSymlinkService.addSymlinksToOtherDatasetLayers(unpackToDir, layersToLink.getOrElse(List.empty)) - _ <- addLinkedLayersToDataSourceProperties(unpackToDir, - dataSourceId.organizationId, - layersToLink.getOrElse(List.empty)) } yield () } @@ -580,7 +581,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, private def cleanUpOnFailure[T](result: Box[T], datasetId: ObjectId, dataSourceId: DataSourceId, - datasetNeedsConversion: Boolean, + needsConversion: Boolean, label: String): Fox[Unit] = result match { case Full(_) => @@ -589,7 +590,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, deleteOnDisk(dataSourceId.organizationId, dataSourceId.directoryName, None, - datasetNeedsConversion, + needsConversion, Some("the upload failed")) Fox.failure(s"Unknown error $label") case Failure(msg, e, _) => @@ -597,7 +598,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, deleteOnDisk(dataSourceId.organizationId, dataSourceId.directoryName, None, - datasetNeedsConversion, + needsConversion, Some("the upload failed")) remoteWebknossosClient.deleteDataset(datasetId) for { @@ -623,53 +624,12 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ <- Fox.fromBool(list.forall(identity)) } yield () - private def dataSourceDirFor(dataSourceId: DataSourceId, - datasetNeedsConversion: Boolean, - uploadToS3: Boolean): Path = { - val dataSourceDir = { - if (datasetNeedsConversion) - dataBaseDir.resolve(dataSourceId.organizationId).resolve(forConversionDir).resolve(dataSourceId.directoryName) - else { - if (uploadToS3) - s3UploadDirectory(dataSourceId.organizationId, dataSourceId.directoryName) - else - dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) - } - } - - dataSourceDir - } - - private def addLinkedLayersToDataSourceProperties(unpackToDir: Path, - organizationId: String, - layersToLink: List[LegacyLinkedLayerIdentifier]): Fox[Unit] = - if (layersToLink.isEmpty) { - Fox.successful(()) - } else { - val dataSource = dataSourceService.dataSourceFromDir(unpackToDir, organizationId) - for { - dataSourceUsable <- dataSource.toUsable.toFox ?~> "Uploaded dataset has no valid properties file, cannot link layers" - layers <- Fox.serialCombined(layersToLink)(layerFromIdentifier) - dataSourceWithLinkedLayers = dataSourceUsable.copy(dataLayers = dataSourceUsable.dataLayers ::: layers) - _ <- dataSourceService.updateDataSourceOnDisk(dataSourceWithLinkedLayers, - expectExisting = true, - validate = true) ?~> "Could not write combined properties file" - } yield () - } - - private def layerFromIdentifier(layerIdentifier: LegacyLinkedLayerIdentifier): Fox[StaticLayer] = { - val dataSourcePath = layerIdentifier.pathIn(dataBaseDir).getParent - val dataSource = dataSourceService.dataSourceFromDir(dataSourcePath, layerIdentifier.getOrganizationId) - for { - usableDataSource <- dataSource.toUsable.toFox ?~> "Layer to link is not in dataset with valid properties file." - layer: StaticLayer <- usableDataSource.getDataLayer(layerIdentifier.layerName).toFox - newName = layerIdentifier.newLayerName.getOrElse(layerIdentifier.layerName) - layerRenamed: StaticLayer <- layer match { - case l: StaticColorLayer => Fox.successful(l.copy(name = newName)) - case l: StaticSegmentationLayer => Fox.successful(l.copy(name = newName)) - case _ => Fox.failure("Unknown layer type for link") - } - } yield layerRenamed + private def unpackToDirFor(dataSourceId: DataSourceId, datasetNeedsConversion: Boolean): Path = { + val orgaDir = dataBaseDir.resolve(dataSourceId.organizationId) + if (datasetNeedsConversion) + orgaDir.resolve(forConversionDir).resolve(dataSourceId.directoryName) + else + orgaDir.resolve(uploadingDir).resolve(unpackedDir).resolve(dataSourceId.directoryName) } private def guessTypeOfUploadedDataSource(dataSourceDir: Path): UploadedDataSourceType.Value = From 2fbe5245beb3fa9f59c30c5c2563aaef44ef8903 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 25 Sep 2025 11:31:35 +0200 Subject: [PATCH 18/62] move to target depending on config, pass datasource to wk side, use new layerToLink format --- app/controllers/DatasetController.scala | 7 +- .../WKRemoteDataStoreController.scala | 58 ++--- .../dataset/DatasetUploadToPathsService.scala | 6 +- conf/webknossos.latest.routes | 2 +- .../controllers/LegacyController.scala | 20 ++ .../services/DSRemoteWebknossosClient.scala | 10 +- .../services/uploading/UploadService.scala | 225 +++++++++--------- 7 files changed, 164 insertions(+), 164 deletions(-) diff --git a/app/controllers/DatasetController.scala b/app/controllers/DatasetController.scala index 1b358acd27a..b3d941c3649 100755 --- a/app/controllers/DatasetController.scala +++ b/app/controllers/DatasetController.scala @@ -15,6 +15,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ LayerAttachmentType, UsableDataSource } +import com.scalableminds.webknossos.datastore.services.uploading.LinkedLayerIdentifier import mail.{MailchimpClient, MailchimpTag} import models.analytics.{AnalyticsService, ChangeDatasetSettingsEvent, OpenDatasetEvent} import models.dataset._ @@ -54,12 +55,6 @@ object DatasetUpdateParameters extends TristateOptionJsonHelper { Json.configured(tristateOptionParsing).format[DatasetUpdateParameters] } -case class LinkedLayerIdentifier(datasetId: ObjectId, layerName: String, newLayerName: Option[String] = None) - -object LinkedLayerIdentifier { - implicit val jsonFormat: OFormat[LinkedLayerIdentifier] = Json.format[LinkedLayerIdentifier] -} - case class ReserveDatasetUploadToPathsRequest( datasetName: String, layersToLink: Seq[LinkedLayerIdentifier], diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index 3725a329269..d14de3c0f35 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -10,7 +10,8 @@ import com.scalableminds.webknossos.datastore.models.UnfinishedUpload import com.scalableminds.webknossos.datastore.models.datasource.DataSource import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataStoreStatus} import com.scalableminds.webknossos.datastore.services.uploading.{ - LegacyLinkedLayerIdentifier, + LinkedLayerIdentifier, + ReportDatasetUploadParameters, ReserveAdditionalInformation, ReserveUploadInformation } @@ -81,12 +82,10 @@ class WKRemoteDataStoreController @Inject()( _ <- Fox.fromBool(dataStore.onlyAllowedOrganization.forall(_ == organization._id)) ?~> "dataset.upload.Datastore.restricted" folderId = uploadInfo.folderId.getOrElse(organization._rootFolder) _ <- folderDAO.assertUpdateAccess(folderId)(AuthorizedAccessContext(user)) ?~> "folder.noWriteAccess" - layersToLinkWithDirectoryName <- Fox.serialCombined(uploadInfo.layersToLink.getOrElse(List.empty))(l => - validateLayerToLink(l, user)) ?~> "dataset.upload.invalidLinkedLayers" + _ <- Fox.serialCombined(uploadInfo.layersToLink.getOrElse(List.empty))(l => validateLayerToLink(l, user)) ?~> "dataset.upload.invalidLinkedLayers" newDatasetId = ObjectId.generate _ <- Fox.runIf(request.body.requireUniqueName.getOrElse(false))( datasetService.assertNewDatasetNameUnique(request.body.name, organization._id)) - // TODO pass isVirtual dataset <- datasetService.createPreliminaryDataset(newDatasetId, uploadInfo.name, datasetService.generateDirectoryName(uploadInfo.name, @@ -96,10 +95,7 @@ class WKRemoteDataStoreController @Inject()( _ <- datasetDAO.updateFolder(dataset._id, folderId)(GlobalAccessContext) _ <- datasetService.addInitialTeams(dataset, uploadInfo.initialTeams, user)(AuthorizedAccessContext(user)) _ <- datasetService.addUploader(dataset, user._id)(AuthorizedAccessContext(user)) - additionalInfo = ReserveAdditionalInformation(dataset._id, - dataset.directoryName, - if (layersToLinkWithDirectoryName.isEmpty) None - else Some(layersToLinkWithDirectoryName)) + additionalInfo = ReserveAdditionalInformation(dataset._id, dataset.directoryName) } yield Ok(Json.toJson(additionalInfo)) } } @@ -133,51 +129,35 @@ class WKRemoteDataStoreController @Inject()( } } - private def validateLayerToLink(layerIdentifier: LegacyLinkedLayerIdentifier, requestingUser: User)( - implicit ec: ExecutionContext, - m: MessagesProvider): Fox[LegacyLinkedLayerIdentifier] = + private def validateLayerToLink(layerIdentifier: LinkedLayerIdentifier, + requestingUser: User)(implicit ec: ExecutionContext, m: MessagesProvider): Fox[Unit] = for { - organization <- organizationDAO.findOne(layerIdentifier.getOrganizationId)(GlobalAccessContext) ?~> Messages( - "organization.notFound", - layerIdentifier.getOrganizationId) ~> NOT_FOUND - datasetBox <- datasetDAO - .findOneByNameAndOrganization(layerIdentifier.dataSetName, organization._id)( - AuthorizedAccessContext(requestingUser)) - .shiftBox - dataset <- datasetBox match { - case Full(ds) => Fox.successful(ds) - case _ => - ObjectId - .fromString(layerIdentifier.dataSetName) - .flatMap(interpretedAsId => datasetDAO.findOne(interpretedAsId)(AuthorizedAccessContext(requestingUser))) ?~> Messages( - "dataset.notFound", - layerIdentifier.dataSetName) - } + dataset <- datasetDAO.findOne(layerIdentifier.datasetId)(AuthorizedAccessContext(requestingUser)) ?~> Messages( + "dataset.notFound", + layerIdentifier.datasetId) ~> NOT_FOUND isTeamManagerOrAdmin <- userService.isTeamManagerOrAdminOfOrg(requestingUser, dataset._organization) _ <- Fox.fromBool(isTeamManagerOrAdmin || requestingUser.isDatasetManager || dataset.isPublic) ?~> "dataset.upload.linkRestricted" - } yield layerIdentifier.copy(datasetDirectoryName = Some(dataset.directoryName)) + } yield () def reportDatasetUpload(name: String, key: String, token: String, - datasetDirectoryName: String, - datasetSizeBytes: Long, - needsConversion: Boolean, - viaAddRoute: Boolean): Action[AnyContent] = - Action.async { implicit request => + datasetId: ObjectId): Action[ReportDatasetUploadParameters] = + Action.async(validateJson[ReportDatasetUploadParameters]) { implicit request => dataStoreService.validateAccess(name, key) { dataStore => for { user <- bearerTokenService.userForToken(token) - dataset <- datasetDAO.findOneByDirectoryNameAndOrganization(datasetDirectoryName, user._organization)( - GlobalAccessContext) ?~> Messages("dataset.notFound", datasetDirectoryName) ~> NOT_FOUND - _ <- Fox.runIf(!needsConversion && !viaAddRoute)(usedStorageService.refreshStorageReportForDataset(dataset)) - _ <- Fox.runIf(!needsConversion)(logUploadToSlack(user, dataset._id, viaAddRoute)) - _ = analyticsService.track(UploadDatasetEvent(user, dataset, dataStore, datasetSizeBytes)) - _ = if (!needsConversion) mailchimpClient.tagUser(user, MailchimpTag.HasUploadedOwnDataset) + dataset <- datasetDAO.findOne(datasetId)(GlobalAccessContext) ?~> Messages("dataset.notFound", datasetId) ~> NOT_FOUND + _ <- Fox.runIf(!request.body.needsConversion)(usedStorageService.refreshStorageReportForDataset(dataset)) + _ <- Fox.runIf(!request.body.needsConversion)(logUploadToSlack(user, dataset._id, viaAddRoute = false)) + _ = analyticsService.track(UploadDatasetEvent(user, dataset, dataStore, request.body.datasetSizeBytes)) + _ = if (!request.body.needsConversion) mailchimpClient.tagUser(user, MailchimpTag.HasUploadedOwnDataset) + // TODO update dataset in db with layersToLink } yield Ok(Json.obj("id" -> dataset._id)) } } + // TODO do this for the new add codepath private def logUploadToSlack(user: User, datasetId: ObjectId, viaAddRoute: Boolean): Fox[Unit] = for { organization <- organizationDAO.findOne(user._organization)(GlobalAccessContext) diff --git a/app/models/dataset/DatasetUploadToPathsService.scala b/app/models/dataset/DatasetUploadToPathsService.scala index 51a102fadc2..9f8dc3f677a 100644 --- a/app/models/dataset/DatasetUploadToPathsService.scala +++ b/app/models/dataset/DatasetUploadToPathsService.scala @@ -20,11 +20,11 @@ import com.scalableminds.webknossos.datastore.models.datasource.{ UsableDataSource } import com.scalableminds.webknossos.datastore.services.DataSourceValidation +import com.scalableminds.webknossos.datastore.services.uploading.LinkedLayerIdentifier import controllers.{ - LinkedLayerIdentifier, ReserveAttachmentUploadToPathRequest, - ReserveDatasetUploadToPathsRequest, - ReserveDatasetUploadToPathsForPreliminaryRequest + ReserveDatasetUploadToPathsForPreliminaryRequest, + ReserveDatasetUploadToPathsRequest } import models.organization.OrganizationDAO import models.user.User diff --git a/conf/webknossos.latest.routes b/conf/webknossos.latest.routes index 26b55f6c07f..835fb8a87be 100644 --- a/conf/webknossos.latest.routes +++ b/conf/webknossos.latest.routes @@ -132,7 +132,7 @@ PUT /datastores/:name/datasources/:datasetId PATCH /datastores/:name/status controllers.WKRemoteDataStoreController.statusUpdate(name: String, key: String) POST /datastores/:name/reserveUpload controllers.WKRemoteDataStoreController.reserveDatasetUpload(name: String, key: String, token: String) GET /datastores/:name/getUnfinishedUploadsForUser controllers.WKRemoteDataStoreController.getUnfinishedUploadsForUser(name: String, key: String, token: String, organizationName: String) -POST /datastores/:name/reportDatasetUpload controllers.WKRemoteDataStoreController.reportDatasetUpload(name: String, key: String, token: String, datasetDirectoryName: String, datasetSizeBytes: Long, needsConversion: Boolean, viaAddRoute: Boolean) +POST /datastores/:name/reportDatasetUpload controllers.WKRemoteDataStoreController.reportDatasetUpload(name: String, key: String, token: String, datasetId: ObjectId) POST /datastores/:name/deleteDataset controllers.WKRemoteDataStoreController.deleteDataset(name: String, key: String) GET /datastores/:name/findDatasetId controllers.WKRemoteDataStoreController.findDatasetId(name: String, key: String, datasetDirectoryName: String, organizationId: String) GET /datastores/:name/jobExportProperties controllers.WKRemoteDataStoreController.jobExportProperties(name: String, key: String, jobId: ObjectId) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala index 74fc9f6a391..e2f2f59b27f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala @@ -37,6 +37,26 @@ object LegacyReserveManualUploadInformation { Json.format[LegacyReserveManualUploadInformation] } +case class LegacyLinkedLayerIdentifier(organizationId: Option[String], + organizationName: Option[String], + // Filled by backend after identifying the dataset by name. Afterwards this updated value is stored in the redis database. + datasetDirectoryName: Option[String], + dataSetName: String, + layerName: String, + newLayerName: Option[String] = None) { + + def getOrganizationId: String = this.organizationId.getOrElse(this.organizationName.getOrElse("")) +} + +object LegacyLinkedLayerIdentifier { + def apply(organizationId: String, + dataSetName: String, + layerName: String, + newLayerName: Option[String]): LegacyLinkedLayerIdentifier = + new LegacyLinkedLayerIdentifier(Some(organizationId), None, None, dataSetName, layerName, newLayerName) + implicit val jsonFormat: OFormat[LegacyLinkedLayerIdentifier] = Json.format[LegacyLinkedLayerIdentifier] +} + class LegacyController @Inject()( accessTokenService: DataStoreAccessTokenService, remoteWebknossosClient: DSRemoteWebknossosClient, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala index ea8dfda4a3b..1ec6fe45f5d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala @@ -16,13 +16,14 @@ import com.scalableminds.webknossos.datastore.models.annotation.AnnotationSource import com.scalableminds.webknossos.datastore.models.datasource.{DataSource, DataSourceId} import com.scalableminds.webknossos.datastore.rpc.RPC import com.scalableminds.webknossos.datastore.services.uploading.{ + ReportDatasetUploadParameters, ReserveAdditionalInformation, ReserveUploadInformation } import com.scalableminds.webknossos.datastore.storage.DataVaultCredential import com.typesafe.scalalogging.LazyLogging import play.api.inject.ApplicationLifecycle -import play.api.libs.json.{JsValue, Json, OFormat} +import play.api.libs.json.{Json, OFormat} import scala.concurrent.ExecutionContext import scala.concurrent.duration._ @@ -94,15 +95,12 @@ class DSRemoteWebknossosClient @Inject()( .getWithJsonResponse[List[UnfinishedUpload]] } yield unfinishedUploads - def reportUpload(datasetId: ObjectId, datasetSizeBytes: Long, needsConversion: Boolean)( - implicit tc: TokenContext): Fox[_] = + def reportUpload(datasetId: ObjectId, parameters: ReportDatasetUploadParameters)(implicit tc: TokenContext): Fox[_] = rpc(s"$webknossosUri/api/datastores/$dataStoreName/reportDatasetUpload") .addQueryString("key" -> dataStoreKey) .addQueryString("datasetId" -> datasetId.toString) - .addQueryString("needsConversion" -> needsConversion.toString) - .addQueryString("datasetSizeBytes" -> datasetSizeBytes.toString) .withTokenFromContext - .postEmptyWithJsonResponse[JsValue]() + .postJson[ReportDatasetUploadParameters](parameters) def reportDataSources(dataSources: List[DataSource], organizationId: Option[String]): Fox[_] = rpc(s"$webknossosUri/api/datastores/$dataStoreName/datasources") diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index cefcac380cb..f64d17dcf5d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -50,50 +50,40 @@ case class ReserveUploadInformation( totalFileCount: Long, filePaths: Option[List[String]], totalFileSizeInBytes: Option[Long], - layersToLink: Option[List[LegacyLinkedLayerIdentifier]], + layersToLink: Option[List[LinkedLayerIdentifier]], initialTeams: List[ObjectId], // team ids folderId: Option[ObjectId], requireUniqueName: Option[Boolean], isVirtual: Option[Boolean] // Only set (to false) for legacy manual uploads ) object ReserveUploadInformation { - implicit val reserveUploadInformation: OFormat[ReserveUploadInformation] = Json.format[ReserveUploadInformation] + implicit val jsonFormat: OFormat[ReserveUploadInformation] = Json.format[ReserveUploadInformation] } -case class ReserveAdditionalInformation(newDatasetId: ObjectId, - directoryName: String, - layersToLink: Option[List[LegacyLinkedLayerIdentifier]]) +case class ReserveAdditionalInformation(newDatasetId: ObjectId, directoryName: String) object ReserveAdditionalInformation { - implicit val reserveAdditionalInformation: OFormat[ReserveAdditionalInformation] = + implicit val jsonFormat: OFormat[ReserveAdditionalInformation] = Json.format[ReserveAdditionalInformation] } -case class LegacyLinkedLayerIdentifier(organizationId: Option[String], - organizationName: Option[String], - // Filled by backend after identifying the dataset by name. Afterwards this updated value is stored in the redis database. - datasetDirectoryName: Option[String], - dataSetName: String, - layerName: String, - newLayerName: Option[String] = None) { - - def getOrganizationId: String = this.organizationId.getOrElse(this.organizationName.getOrElse("")) - - def pathIn(dataBaseDir: Path): Path = { - val datasetDirectoryName = this.datasetDirectoryName.getOrElse(dataSetName) - dataBaseDir.resolve(getOrganizationId).resolve(datasetDirectoryName).resolve(layerName) - } +case class ReportDatasetUploadParameters( + needsConversion: Boolean, + datasetSizeBytes: Long, + dataSourceOpt: Option[DataSource], // must be set if needsConversion is false + layersToLink: Seq[LinkedLayerIdentifier] +) +object ReportDatasetUploadParameters { + implicit val jsonFormat: OFormat[ReportDatasetUploadParameters] = + Json.format[ReportDatasetUploadParameters] } -object LegacyLinkedLayerIdentifier { - def apply(organizationId: String, - dataSetName: String, - layerName: String, - newLayerName: Option[String]): LegacyLinkedLayerIdentifier = - new LegacyLinkedLayerIdentifier(Some(organizationId), None, None, dataSetName, layerName, newLayerName) - implicit val jsonFormat: OFormat[LegacyLinkedLayerIdentifier] = Json.format[LegacyLinkedLayerIdentifier] +case class LinkedLayerIdentifier(datasetId: ObjectId, layerName: String, newLayerName: Option[String] = None) + +object LinkedLayerIdentifier { + implicit val jsonFormat: OFormat[LinkedLayerIdentifier] = Json.format[LinkedLayerIdentifier] } -case class LinkedLayerIdentifiers(layersToLink: Option[List[LegacyLinkedLayerIdentifier]]) +case class LinkedLayerIdentifiers(layersToLink: Option[List[LinkedLayerIdentifier]]) object LinkedLayerIdentifiers { implicit val jsonFormat: OFormat[LinkedLayerIdentifiers] = Json.format[LinkedLayerIdentifiers] } @@ -165,7 +155,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, def extractDatasetUploadId(uploadFileId: String): String = uploadFileId.split("/").headOption.getOrElse("") - private def uploadDirectory(organizationId: String, uploadId: String): Path = + private def uploadDirectoryFor(organizationId: String, uploadId: String): Path = dataBaseDir.resolve(organizationId).resolve(uploadingDir).resolve(uploadId) private def getDataSourceIdByUploadId(uploadId: String): Fox[DataSourceId] = @@ -189,10 +179,10 @@ class UploadService @Inject()(dataSourceService: DataSourceService, .insertLong(redisKeyForCurrentUploadedTotalFileSizeInBytes(reserveUploadInfo.uploadId), 0L) )) } + newDataSourceId = DataSourceId(reserveUploadAdditionalInfo.directoryName, reserveUploadInfo.organization) _ <- runningUploadMetadataStore.insert( redisKeyForDataSourceId(reserveUploadInfo.uploadId), - Json.stringify( - Json.toJson(DataSourceId(reserveUploadAdditionalInfo.directoryName, reserveUploadInfo.organization))) + Json.stringify(Json.toJson(newDataSourceId)) ) _ <- runningUploadMetadataStore.insert( redisKeyForDatasetId(reserveUploadInfo.uploadId), @@ -206,10 +196,10 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ <- runningUploadMetadataStore.insert(redisKeyForFilePaths(reserveUploadInfo.uploadId), filePaths) _ <- runningUploadMetadataStore.insert( redisKeyForLinkedLayerIdentifier(reserveUploadInfo.uploadId), - Json.stringify(Json.toJson(LinkedLayerIdentifiers(reserveUploadAdditionalInfo.layersToLink))) + Json.stringify(Json.toJson(LinkedLayerIdentifiers(reserveUploadInfo.layersToLink))) ) _ = logger.info( - f"Reserving dataset upload of ${reserveUploadInfo.organization}/${reserveUploadInfo.name} with uploadId ${reserveUploadInfo.uploadId}...") + f"Reserving dataset upload ${reserveUploadInfo.uploadId} for dataset ${reserveUploadAdditionalInfo.newDatasetId} ($newDataSourceId)...") } yield () def addUploadIdsToUnfinishedUploads( @@ -243,7 +233,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, val uploadId = extractDatasetUploadId(uploadFileId) for { dataSourceId <- getDataSourceIdByUploadId(uploadId) - uploadDir = uploadDirectory(dataSourceId.organizationId, uploadId) + uploadDir = uploadDirectoryFor(dataSourceId.organizationId, uploadId) filePathRaw = uploadFileId.split("/").tail.mkString("/") filePath = if (filePathRaw.charAt(0) == '/') filePathRaw.drop(1) else filePathRaw _ <- Fox.fromBool(!isOutsideUploadDir(uploadDir, filePath)) ?~> s"Invalid file path: $filePath" @@ -331,7 +321,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, logger.info(f"Cancelling dataset upload of uploadId $uploadId (datasetId $datasetId)...") for { _ <- removeFromRedis(uploadId) - _ <- PathUtils.deleteDirectoryRecursively(uploadDirectory(dataSourceId.organizationId, uploadId)).toFox + _ <- PathUtils.deleteDirectoryRecursively(uploadDirectoryFor(dataSourceId.organizationId, uploadId)).toFox } yield () } else Fox.failure(s"Unknown upload") } @@ -356,13 +346,11 @@ class UploadService @Inject()(dataSourceService: DataSourceService, datasetId <- getDatasetIdByUploadId(uploadId) _ = logger.info(s"Finishing dataset upload $uploadId of datasetId $datasetId ($dataSourceId)...") needsConversion = uploadInformation.needsConversion.getOrElse(false) - uploadDir = uploadDirectory(dataSourceId.organizationId, uploadId) - isS3UploadEnabled = dataStoreConfig.Datastore.S3Upload.enabled - + uploadDir = uploadDirectoryFor(dataSourceId.organizationId, uploadId) _ <- assertWithinRequestedFileSizeAndCleanUpOtherwise(uploadDir, uploadId) _ <- checkAllChunksUploaded(uploadId) - unpackToDir = unpackToDirFor(dataSourceId, needsConversion) + unpackToDir = unpackToDirFor(dataSourceId) _ <- ensureDirectoryBox(unpackToDir.getParent).toFox ?~> "dataset.import.fileAccessDenied" unpackResult <- unpackDataset(uploadDir, unpackToDir).shiftBox _ <- cleanUpUploadedDataset(uploadDir, uploadId) @@ -372,54 +360,88 @@ class UploadService @Inject()(dataSourceService: DataSourceService, needsConversion, label = s"unpacking to dataset to $unpackToDir") postProcessingResult <- Fox - .runIf(!needsConversion)(postProcessUploadedDataSource(needsConversion, unpackToDir, dataSourceId)) + .runIf(!needsConversion)(exploreProcessUploadedDataSourceIfNeeded(needsConversion, unpackToDir, dataSourceId)) .shiftBox _ <- cleanUpOnFailure(postProcessingResult, datasetId, dataSourceId, needsConversion, label = s"processing dataset at $unpackToDir") - dataSource = dataSourceService.dataSourceFromDir(unpackToDir, dataSourceId.organizationId) datasetSizeBytes <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox - dataSourceWithAbsolutePathsOpt <- moveUnpackedToTarget(unpackToDir) - + dataSourceWithAbsolutePathsOpt <- moveUnpackedToTarget(unpackToDir, needsConversion, datasetId, dataSourceId) linkedLayerIdentifiers <- getObjectFromRedis[LinkedLayerIdentifiers](redisKeyForLinkedLayerIdentifier(uploadId)) _ <- remoteWebknossosClient.reportUpload( datasetId, - datasetSizeBytes, - dataSourceWithAbsolutePathsOpt, - linkedLayerIdentifiers, - uploadInformation.needsConversion.getOrElse(false) + ReportDatasetUploadParameters( + uploadInformation.needsConversion.getOrElse(false), + datasetSizeBytes, + dataSourceWithAbsolutePathsOpt, + linkedLayerIdentifiers.layersToLink.getOrElse(List.empty) + ) ) ?~> "reportUpload.failed" } yield datasetId } - private def moveUnpackedToTarget(path: Path, needsConversion: Boolean, dataSourceId: DataSourceId): Fox[DataSource] = - // TODO move either to S3, to orgaDir/direcctoryName locally or to forConversion. Adapt paths - if (!needsConversion) { + private def deleteFilesNotReferencedInDataSource(unpackedDir: Path, dataSource: DataSource): Fox[Unit] = + for { + filesToDelete <- findNonReferencedFiles(unpackedDir, dataSource) + _ = if (filesToDelete.nonEmpty) + logger.info(s"Uploaded dataset contains files not referenced in the datasource. Deleting $filesToDelete...") + _ = filesToDelete.foreach(file => { + try { + // TODO move to trash instead? + Files.deleteIfExists(file) + } catch { + case e: Exception => + logger.warn(s"Deletion failed for non-referenced file $file of uploaded dataset: ${e.getMessage}") + } + }) + } yield () + private def moveUnpackedToTarget(unpackedDir: Path, + needsConversion: Boolean, + datasetId: ObjectId, + dataSourceId: DataSourceId): Fox[Option[DataSource]] = + if (needsConversion) { + val forConversionPath = + dataBaseDir.resolve(dataSourceId.organizationId).resolve(forConversionDir).resolve(dataSourceId.directoryName) for { - _ <- Fox.successful(()) - s3UploadBucket <- s3UploadBucketOpt.toFox - beforeS3Upload = Instant.now - s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/$uploadId/" - _ <- uploadDirectoryToS3(unpackToDir, dataSource, s3UploadBucket, s3ObjectKey) - _ = Instant.logSince(beforeS3Upload, - s"Upload of dataset ${dataSourceId.organizationId}/${dataSourceId.directoryName} to S3", - logger) - endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.credentialName).getHost - newBasePath <- UPath.fromString(s"s3://$endPointHost/$s3UploadBucket/$s3ObjectKey").toFox - s3DataSource <- dataSourceService.resolvePathsInNewBasePath(dataSource, newBasePath) - _ <- remoteWebknossosClient.updateDataSource(s3DataSource, datasetId) + _ <- tryo(FileUtils.moveDirectory(unpackedDir.toFile, forConversionPath.toFile)).toFox + } yield None + } else { + for { + dataSourceFromDir <- Fox.successful( + dataSourceService.dataSourceFromDir(unpackedDir, dataSourceId.organizationId)) + _ <- deleteFilesNotReferencedInDataSource(unpackedDir, dataSourceFromDir) + newBasePath <- if (dataStoreConfig.Datastore.S3Upload.enabled) { + for { + s3UploadBucket <- s3UploadBucketOpt.toFox + beforeS3Upload = Instant.now + s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/${dataSourceId.organizationId}/${dataSourceId.directoryName}/" + _ <- uploadDirectoryToS3(unpackedDir, s3UploadBucket, s3ObjectKey) + _ = Instant.logSince(beforeS3Upload, + s"Forwarding of uploaded of dataset $datasetId ($dataSourceId) to S3", + logger) + endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.credentialName).getHost + newBasePath <- UPath.fromString(s"s3://$endPointHost/$s3UploadBucket/$s3ObjectKey").toFox + } yield newBasePath + } else { + val finalUploadedLocalPath = + dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + for { + _ <- tryo(FileUtils.moveDirectory(unpackedDir.toFile, finalUploadedLocalPath.toFile)).toFox + } yield UPath.fromLocalPath(finalUploadedLocalPath) + } + dataSourceWithAdaptedPaths <- dataSourceService.resolvePathsInNewBasePath(dataSourceFromDir, newBasePath) _ = this.synchronized { - PathUtils.deleteDirectoryRecursively(unpackToDir) + PathUtils.deleteDirectoryRecursively(unpackedDir) } - } yield () - } else Fox.successful(()) + } yield Some(dataSourceWithAdaptedPaths) + } - private def postProcessUploadedDataSource(datasetNeedsConversion: Boolean, - unpackToDir: Path, - dataSourceId: DataSourceId): Fox[Unit] = + private def exploreProcessUploadedDataSourceIfNeeded(datasetNeedsConversion: Boolean, + unpackToDir: Path, + dataSourceId: DataSourceId): Fox[Unit] = if (datasetNeedsConversion) Fox.successful(()) else { @@ -493,8 +515,10 @@ class UploadService @Inject()(dataSourceService: DataSourceService, if dataStoreConfig.Datastore.S3Upload.credentialName == credentialName => (accessKeyId, secretAccessKey) } + private lazy val s3UploadBucketOpt: Option[String] = S3DataVault.hostBucketFromUri(new URI(dataStoreConfig.Datastore.S3Upload.credentialName)) + private lazy val s3UploadEndpoint: URI = { val credentialUri = new URI(dataStoreConfig.Datastore.S3Upload.credentialName) new URI( @@ -507,48 +531,32 @@ class UploadService @Inject()(dataSourceService: DataSourceService, null ) } - private lazy val s3ClientBox: Box[S3AsyncClient] = for { + + private lazy val getS3TransferManager: Box[S3TransferManager] = for { accessKeyId <- Box(s3UploadCredentialsOpt.map(_._1)) secretAccessKey <- Box(s3UploadCredentialsOpt.map(_._2)) - } yield - S3AsyncClient - .builder() - .credentialsProvider( - StaticCredentialsProvider.create( + client <- tryo( + S3AsyncClient + .builder() + .credentialsProvider(StaticCredentialsProvider.create( AwsBasicCredentials.builder.accessKeyId(accessKeyId).secretAccessKey(secretAccessKey).build() )) - .crossRegionAccessEnabled(true) - .forcePathStyle(true) - .endpointOverride(s3UploadEndpoint) - .region(Region.US_EAST_1) - // Disabling checksum calculation prevents files being stored with Content Encoding "aws-chunked". - .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) - .build() - - private lazy val transferManagerBox: Box[S3TransferManager] = for { - client <- s3ClientBox + .crossRegionAccessEnabled(true) + .forcePathStyle(true) + .endpointOverride(s3UploadEndpoint) + .region(Region.US_EAST_1) + // Disabling checksum calculation prevents files being stored with Content Encoding "aws-chunked". + .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) + .build()) } yield S3TransferManager.builder().s3Client(client).build() private def uploadDirectoryToS3( dataDir: Path, - dataSource: DataSource, bucketName: String, prefix: String ): Fox[Unit] = for { - _ <- Fox.successful(()) - // Delete all files in the dataDir that are not at a mag path or an attachment path, since we do not need to upload them to S3. - filesToDelete <- getNonReferencedFiles(dataDir, dataSource) - _ = filesToDelete.foreach(file => { - logger.info(s"Deleting file $file before upload to S3.") - try { - Files.deleteIfExists(file) - } catch { - case e: Exception => - logger.warn(s"Could not delete file $file before upload to S3: ${e.getMessage}") - } - }) - transferManager <- transferManagerBox.toFox ?~> "S3 upload is not properly configured, cannot get S3 client" + transferManager <- getS3TransferManager.toFox ?~> "S3 upload is not properly configured, cannot get S3 client" directoryUpload = transferManager.uploadDirectory( UploadDirectoryRequest.builder().bucket(bucketName).s3Prefix(prefix).source(dataDir).build() ) @@ -558,7 +566,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, s"Some files failed to upload to S3: $failedTransfers" } yield () - private def getNonReferencedFiles(dataDir: Path, dataSource: DataSource): Fox[List[Path]] = + private def findNonReferencedFiles(unpackedDir: Path, dataSource: DataSource): Fox[List[Path]] = for { usableDataSource <- dataSource.toUsable.toFox ?~> "Data source is not usable" explicitPaths: Set[Path] = usableDataSource.dataLayers @@ -566,15 +574,15 @@ class UploadService @Inject()(dataSourceService: DataSourceService, layer.mags.map(mag => mag.path match { case Some(_) => None - case None => Some(dataDir.resolve(List(layer.name, mag.mag.toMagLiteral(true)).mkString("/"))) + case None => Some(unpackedDir.resolve(List(layer.name, mag.mag.toMagLiteral(true)).mkString("/"))) })) .flatten .toSet neededPaths = usableDataSource.dataLayers .flatMap(layer => layer.allExplicitPaths.flatMap(_.toLocalPath)) - .map(dataDir.resolve) + .map(unpackedDir.resolve) .toSet ++ explicitPaths - allFiles <- PathUtils.listFilesRecursive(dataDir, silent = true, maxDepth = 10).toFox + allFiles <- PathUtils.listFilesRecursive(unpackedDir, silent = true, maxDepth = 10).toFox filesToDelete = allFiles.filterNot(file => neededPaths.exists(neededPath => file.startsWith(neededPath))) } yield filesToDelete @@ -624,13 +632,12 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ <- Fox.fromBool(list.forall(identity)) } yield () - private def unpackToDirFor(dataSourceId: DataSourceId, datasetNeedsConversion: Boolean): Path = { - val orgaDir = dataBaseDir.resolve(dataSourceId.organizationId) - if (datasetNeedsConversion) - orgaDir.resolve(forConversionDir).resolve(dataSourceId.directoryName) - else - orgaDir.resolve(uploadingDir).resolve(unpackedDir).resolve(dataSourceId.directoryName) - } + private def unpackToDirFor(dataSourceId: DataSourceId): Path = + dataBaseDir + .resolve(dataSourceId.organizationId) + .resolve(uploadingDir) + .resolve(unpackedDir) + .resolve(dataSourceId.directoryName) private def guessTypeOfUploadedDataSource(dataSourceDir: Path): UploadedDataSourceType.Value = if (looksLikeExploredDataSource(dataSourceDir).getOrElse(false)) { From 9e04d83fa9c3ea859f13d90703f79e6e458f2e1d Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 25 Sep 2025 11:48:10 +0200 Subject: [PATCH 19/62] track new datasets also if added via add or uploadToPaths --- app/controllers/DatasetController.scala | 13 +++++++ .../WKRemoteDataStoreController.scala | 35 +++++-------------- app/models/dataset/DatasetService.scala | 34 +++++++++++++++++- .../services/uploading/UploadService.scala | 12 +++---- 4 files changed, 59 insertions(+), 35 deletions(-) diff --git a/app/controllers/DatasetController.scala b/app/controllers/DatasetController.scala index b3d941c3649..e4e842bb40e 100755 --- a/app/controllers/DatasetController.scala +++ b/app/controllers/DatasetController.scala @@ -26,6 +26,7 @@ import models.dataset.explore.{ } import models.folder.FolderService import models.organization.OrganizationDAO +import models.storage.UsedStorageService import models.team.{TeamDAO, TeamService} import models.user.{User, UserDAO, UserService} import play.api.i18n.{Messages, MessagesProvider} @@ -137,6 +138,7 @@ class DatasetController @Inject()(userService: UserService, folderService: FolderService, thumbnailService: ThumbnailService, thumbnailCachingService: ThumbnailCachingService, + usedStorageService: UsedStorageService, conf: WkConf, authenticationService: AccessibleBySwitchingService, analyticsService: AnalyticsService, @@ -231,6 +233,11 @@ class DatasetController @Inject()(userService: UserService, request.body.folderId, user ) + _ <- datasetService.trackNewDataset(dataset, + user, + needsConversion = false, + datasetSizeBytes = 0, + viaAddRoute = false) } yield Ok(Json.obj("newDatasetId" -> dataset._id)) } @@ -649,6 +656,12 @@ class DatasetController @Inject()(userService: UserService, dataset.status == DataSourceStatus.notYetUploadedToPaths || dataset.status == DataSourceStatus.notYetUploaded) ?~> s"Dataset is not in uploading-to-paths status, got ${dataset.status}." _ <- Fox.fromBool(!dataset.isUsable) ?~> s"Dataset is already marked as usable." _ <- datasetDAO.updateDatasetStatusByDatasetId(datasetId, newStatus = "", isUsable = true) + _ <- usedStorageService.refreshStorageReportForDataset(dataset) + _ <- datasetService.trackNewDataset(dataset, + request.identity, + needsConversion = false, + datasetSizeBytes = 0, + viaAddRoute = false) } yield Ok } diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index d14de3c0f35..f4177d1b3a2 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -16,8 +16,6 @@ import com.scalableminds.webknossos.datastore.services.uploading.{ ReserveUploadInformation } import com.typesafe.scalalogging.LazyLogging -import mail.{MailchimpClient, MailchimpTag} -import models.analytics.{AnalyticsService, UploadDatasetEvent} import models.annotation.AnnotationDAO import models.dataset._ import models.dataset.credential.CredentialDAO @@ -26,13 +24,11 @@ import models.job.JobDAO import models.organization.OrganizationDAO import models.storage.UsedStorageService import models.team.TeamDAO -import models.user.{MultiUserDAO, User, UserDAO, UserService} +import models.user.{User, UserDAO, UserService} import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.Json import play.api.mvc.{Action, AnyContent, PlayBodyParsers} import security.{WebknossosBearerTokenAuthenticatorService, WkSilhouetteEnvironment} -import telemetry.SlackNotificationService -import utils.WkConf import scala.concurrent.duration.DurationInt import javax.inject.Inject @@ -42,7 +38,6 @@ class WKRemoteDataStoreController @Inject()( datasetService: DatasetService, dataStoreService: DataStoreService, dataStoreDAO: DataStoreDAO, - analyticsService: AnalyticsService, userService: UserService, organizationDAO: OrganizationDAO, usedStorageService: UsedStorageService, @@ -52,12 +47,8 @@ class WKRemoteDataStoreController @Inject()( folderDAO: FolderDAO, teamDAO: TeamDAO, jobDAO: JobDAO, - multiUserDAO: MultiUserDAO, credentialDAO: CredentialDAO, annotationDAO: AnnotationDAO, - mailchimpClient: MailchimpClient, - slackNotificationService: SlackNotificationService, - conf: WkConf, wkSilhouetteEnvironment: WkSilhouetteEnvironment)(implicit ec: ExecutionContext, bodyParsers: PlayBodyParsers) extends Controller with LazyLogging { @@ -144,31 +135,21 @@ class WKRemoteDataStoreController @Inject()( token: String, datasetId: ObjectId): Action[ReportDatasetUploadParameters] = Action.async(validateJson[ReportDatasetUploadParameters]) { implicit request => - dataStoreService.validateAccess(name, key) { dataStore => + dataStoreService.validateAccess(name, key) { _ => for { user <- bearerTokenService.userForToken(token) dataset <- datasetDAO.findOne(datasetId)(GlobalAccessContext) ?~> Messages("dataset.notFound", datasetId) ~> NOT_FOUND _ <- Fox.runIf(!request.body.needsConversion)(usedStorageService.refreshStorageReportForDataset(dataset)) - _ <- Fox.runIf(!request.body.needsConversion)(logUploadToSlack(user, dataset._id, viaAddRoute = false)) - _ = analyticsService.track(UploadDatasetEvent(user, dataset, dataStore, request.body.datasetSizeBytes)) - _ = if (!request.body.needsConversion) mailchimpClient.tagUser(user, MailchimpTag.HasUploadedOwnDataset) + _ <- datasetService.trackNewDataset(dataset, + user, + request.body.needsConversion, + request.body.datasetSizeBytes, + viaAddRoute = false) // TODO update dataset in db with layersToLink - } yield Ok(Json.obj("id" -> dataset._id)) + } yield Ok } } - // TODO do this for the new add codepath - private def logUploadToSlack(user: User, datasetId: ObjectId, viaAddRoute: Boolean): Fox[Unit] = - for { - organization <- organizationDAO.findOne(user._organization)(GlobalAccessContext) - multiUser <- multiUserDAO.findOne(user._multiUser)(GlobalAccessContext) - resultLink = s"${conf.Http.uri}/datasets/$datasetId" - addLabel = if (viaAddRoute) "(via explore+add)" else "(upload without conversion)" - superUserLabel = if (multiUser.isSuperUser) " (for superuser)" else "" - _ = slackNotificationService.info(s"Dataset added $addLabel$superUserLabel", - s"For organization: ${organization.name}. <$resultLink|Result>") - } yield () - def statusUpdate(name: String, key: String): Action[DataStoreStatus] = Action.async(validateJson[DataStoreStatus]) { implicit request => dataStoreService.validateAccess(name, key) { _ => diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index d0d1160ff73..cdb6d3b4830 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -21,12 +21,16 @@ import com.typesafe.scalalogging.LazyLogging import models.folder.FolderDAO import models.organization.{Organization, OrganizationDAO} import models.team._ -import models.user.{User, UserService} +import models.user.{MultiUserDAO, User, UserService} import com.scalableminds.webknossos.datastore.controllers.PathValidationResult +import mail.{MailchimpClient, MailchimpTag} +import models.analytics.{AnalyticsService, UploadDatasetEvent} import play.api.http.Status.NOT_FOUND import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{JsObject, Json} import security.RandomIDGenerator +import telemetry.SlackNotificationService +import utils.WkConf import javax.inject.Inject import scala.concurrent.duration._ @@ -40,10 +44,15 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, datasetMagsDAO: DatasetMagsDAO, teamDAO: TeamDAO, folderDAO: FolderDAO, + multiUserDAO: MultiUserDAO, + mailchimpClient: MailchimpClient, + analyticsService: AnalyticsService, + slackNotificationService: SlackNotificationService, dataStoreService: DataStoreService, teamService: TeamService, thumbnailCachingService: ThumbnailCachingService, userService: UserService, + conf: WkConf, rpc: RPC)(implicit ec: ExecutionContext) extends FoxImplicits with LazyLogging { @@ -546,6 +555,29 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, case None => datasetId.toString } + def trackNewDataset(dataset: Dataset, + user: User, + needsConversion: Boolean, + datasetSizeBytes: Long, + viaAddRoute: Boolean): Fox[Unit] = + for { + _ <- Fox.runIf(!needsConversion)(logDatasetUploadToSlack(user, dataset._id, viaAddRoute)) + dataStore <- dataStoreDAO.findOneByName(dataset._dataStore)(GlobalAccessContext) + _ = analyticsService.track(UploadDatasetEvent(user, dataset, dataStore, datasetSizeBytes)) + _ = if (!needsConversion) mailchimpClient.tagUser(user, MailchimpTag.HasUploadedOwnDataset) + } yield () + + private def logDatasetUploadToSlack(user: User, datasetId: ObjectId, viaAddRoute: Boolean): Fox[Unit] = + for { + organization <- organizationDAO.findOne(user._organization)(GlobalAccessContext) + multiUser <- multiUserDAO.findOne(user._multiUser)(GlobalAccessContext) + resultLink = s"${conf.Http.uri}/datasets/$datasetId" + addLabel = if (viaAddRoute) "(via explore+add)" else "(upload without conversion)" + superUserLabel = if (multiUser.isSuperUser) " (for superuser)" else "" + _ = slackNotificationService.info(s"Dataset added $addLabel$superUserLabel", + s"For organization: ${organization.name}. <$resultLink|Result>") + } yield () + def publicWrites(dataset: Dataset, requestingUserOpt: Option[User], organization: Option[Organization] = None, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index f64d17dcf5d..54624af070f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -359,9 +359,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, dataSourceId, needsConversion, label = s"unpacking to dataset to $unpackToDir") - postProcessingResult <- Fox - .runIf(!needsConversion)(exploreProcessUploadedDataSourceIfNeeded(needsConversion, unpackToDir, dataSourceId)) - .shiftBox + postProcessingResult <- exploreUploadedDataSourceIfNeeded(needsConversion, unpackToDir, dataSourceId).shiftBox _ <- cleanUpOnFailure(postProcessingResult, datasetId, dataSourceId, @@ -439,10 +437,10 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } yield Some(dataSourceWithAdaptedPaths) } - private def exploreProcessUploadedDataSourceIfNeeded(datasetNeedsConversion: Boolean, - unpackToDir: Path, - dataSourceId: DataSourceId): Fox[Unit] = - if (datasetNeedsConversion) + private def exploreUploadedDataSourceIfNeeded(needsConversion: Boolean, + unpackToDir: Path, + dataSourceId: DataSourceId): Fox[Unit] = + if (needsConversion) Fox.successful(()) else { for { From d09c5f10680105ddaebbff18e44da0f2745119d5 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 25 Sep 2025 12:00:36 +0200 Subject: [PATCH 20/62] wip legacy api version --- conf/webknossos.versioned.routes | 2 ++ .../scalableminds/util/mvc/ApiVersioning.scala | 2 +- .../datastore/controllers/LegacyController.scala | 3 +++ .../conf/datastore.versioned.routes | 16 ++++++++++++++++ .../conf/tracingstore.versioned.routes | 1 + 5 files changed, 23 insertions(+), 1 deletion(-) diff --git a/conf/webknossos.versioned.routes b/conf/webknossos.versioned.routes index 89dbe7bf49c..82ff7f7f57c 100644 --- a/conf/webknossos.versioned.routes +++ b/conf/webknossos.versioned.routes @@ -4,6 +4,7 @@ # Note: keep this in sync with the reported version numbers in the com.scalableminds.util.mvc.ApiVersioning trait # version log + # changed in v12: Dataset upload now expects layersToLink in new format with datasetId instead of orgaId+directoryName # changed in v11: Datasets reserveManualUpload flow via WK side. Note: older versions of the route are *not* supported for security reasons. # changed in v9: Datasets are now identified by their id, not their name. The routes now need to pass a dataset id instead of a name and organization id tuple. # Requests to the TracingStore and DatasStore need to address a dataset based on its directoryName and organization id. @@ -15,6 +16,7 @@ # new in v3: annotation info and finish request now take timestamp # new in v2: annotation json contains visibility enum instead of booleans +-> /v12/ webknossos.latest.Routes -> /v11/ webknossos.latest.Routes -> /v10/ webknossos.latest.Routes -> /v9/ webknossos.latest.Routes diff --git a/util/src/main/scala/com/scalableminds/util/mvc/ApiVersioning.scala b/util/src/main/scala/com/scalableminds/util/mvc/ApiVersioning.scala index 8b80052ae39..6ebe75036c1 100644 --- a/util/src/main/scala/com/scalableminds/util/mvc/ApiVersioning.scala +++ b/util/src/main/scala/com/scalableminds/util/mvc/ApiVersioning.scala @@ -5,7 +5,7 @@ import play.api.mvc.RequestHeader trait ApiVersioning { - private lazy val CURRENT_API_VERSION: Int = 11 + private lazy val CURRENT_API_VERSION: Int = 12 private lazy val OLDEST_SUPPORTED_API_VERSION: Int = 5 protected lazy val apiVersioningInfo: JsObject = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala index e2f2f59b27f..7b8a2f73944 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala @@ -48,6 +48,7 @@ case class LegacyLinkedLayerIdentifier(organizationId: Option[String], def getOrganizationId: String = this.organizationId.getOrElse(this.organizationName.getOrElse("")) } +// TODO use object LegacyLinkedLayerIdentifier { def apply(organizationId: String, dataSetName: String, @@ -74,6 +75,8 @@ class LegacyController @Inject()( override def allowRemoteOrigin: Boolean = true + def reserveUploadV11(): Action[LegacyReserveManualUploadInformation] = ??? // TODO + // To be called by people with disk access but not DatasetManager role. This way, they can upload a dataset manually on disk, // and it can be put in a webknossos folder where they have access def reserveManualUploadV10(): Action[LegacyReserveManualUploadInformation] = diff --git a/webknossos-datastore/conf/datastore.versioned.routes b/webknossos-datastore/conf/datastore.versioned.routes index ad27e58c16b..b3e3a6d9cd6 100644 --- a/webknossos-datastore/conf/datastore.versioned.routes +++ b/webknossos-datastore/conf/datastore.versioned.routes @@ -1,9 +1,15 @@ # Note: keep this in sync with the reported version numbers in the com.scalableminds.util.mvc.ApiVersioning trait +-> /v12/ datastore.latest.Routes + +POST /v11/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() + -> /v11/ datastore.latest.Routes POST /v10/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() +POST /v10/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() + -> /v10/ datastore.latest.Routes # Read image data @@ -49,6 +55,8 @@ GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:data POST /v9/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() +POST /v9/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() + -> /v9/ datastore.latest.Routes # Read image data @@ -95,6 +103,8 @@ GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:data POST /v8/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() +POST /v8/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() + -> /v8/ datastore.latest.Routes # Read image data @@ -140,6 +150,8 @@ GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:data POST /v7/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() +POST /v7/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() + -> /v7/ datastore.latest.Routes # Read image data @@ -185,6 +197,8 @@ GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:data POST /v6/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() +POST /v6/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() + -> /v6/ datastore.latest.Routes # Read image data @@ -228,5 +242,7 @@ GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:data POST /v5/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() +POST /v5/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() + -> /v5/ datastore.latest.Routes -> / datastore.latest.Routes diff --git a/webknossos-tracingstore/conf/tracingstore.versioned.routes b/webknossos-tracingstore/conf/tracingstore.versioned.routes index 695d6681b4a..734d31ed67c 100644 --- a/webknossos-tracingstore/conf/tracingstore.versioned.routes +++ b/webknossos-tracingstore/conf/tracingstore.versioned.routes @@ -1,5 +1,6 @@ # Note: keep this in sync with the reported version numbers in the com.scalableminds.util.mvc.ApiVersioning trait +-> /v12/ tracingstore.latest.Routes -> /v11/ tracingstore.latest.Routes -> /v10/ tracingstore.latest.Routes -> /v9/ tracingstore.latest.Routes From fa4a817299480705ca1412b853a1d1e59fdcc790 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 25 Sep 2025 14:21:00 +0200 Subject: [PATCH 21/62] register dataset in db,cleanup,log to slack, validate linked layers --- app/controllers/DatasetController.scala | 4 +- .../WKRemoteDataStoreController.scala | 61 +++++++++---------- app/models/dataset/ComposeService.scala | 2 +- app/models/dataset/DatasetService.scala | 23 ++----- .../dataset/DatasetUploadToPathsService.scala | 28 +-------- app/models/dataset/LayerToLinkService.scala | 59 ++++++++++++++++++ conf/application.conf | 4 +- .../controllers/DataSourceController.scala | 4 +- .../services/DataSourceService.scala | 36 +++++------ .../services/uploading/UploadService.scala | 54 +++++++++------- .../DSSlackNotificationService.scala | 5 ++ 11 files changed, 157 insertions(+), 123 deletions(-) create mode 100644 app/models/dataset/LayerToLinkService.scala diff --git a/app/controllers/DatasetController.scala b/app/controllers/DatasetController.scala index e4e842bb40e..75e56cf222b 100755 --- a/app/controllers/DatasetController.scala +++ b/app/controllers/DatasetController.scala @@ -117,7 +117,7 @@ object SegmentAnythingMaskParameters { implicit val jsonFormat: Format[SegmentAnythingMaskParameters] = Json.format[SegmentAnythingMaskParameters] } -case class DataSourceRegistrationInfo(dataSource: UsableDataSource, folderId: Option[String], dataStoreName: String) +case class DataSourceRegistrationInfo(dataSource: UsableDataSource, folderId: Option[ObjectId], dataStoreName: String) object DataSourceRegistrationInfo { implicit val jsonFormat: OFormat[DataSourceRegistrationInfo] = Json.format[DataSourceRegistrationInfo] @@ -209,7 +209,7 @@ class DatasetController @Inject()(userService: UserService, request.body.datasetName, dataStore, dataSource, - folderIdOpt.map(_.toString), + folderIdOpt, request.identity ) ?~> "dataset.explore.autoAdd.failed" } yield Ok(Json.toJson(newDataset._id)) diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index f4177d1b3a2..de203bcf02a 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -1,16 +1,20 @@ package controllers -import com.scalableminds.util.accesscontext.{AuthorizedAccessContext, GlobalAccessContext} +import com.scalableminds.util.accesscontext.{AuthorizedAccessContext, DBAccessContext, GlobalAccessContext} import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Fox, Full} import com.scalableminds.webknossos.datastore.controllers.JobExportProperties import com.scalableminds.webknossos.datastore.helpers.{LayerMagLinkInfo, MagLinkInfo} import com.scalableminds.webknossos.datastore.models.UnfinishedUpload -import com.scalableminds.webknossos.datastore.models.datasource.DataSource +import com.scalableminds.webknossos.datastore.models.datasource.{ + DataSource, + DataSourceId, + DataSourceStatus, + UnusableDataSource +} import com.scalableminds.webknossos.datastore.services.{DataSourcePathInfo, DataStoreStatus} import com.scalableminds.webknossos.datastore.services.uploading.{ - LinkedLayerIdentifier, ReportDatasetUploadParameters, ReserveAdditionalInformation, ReserveUploadInformation @@ -19,13 +23,12 @@ import com.typesafe.scalalogging.LazyLogging import models.annotation.AnnotationDAO import models.dataset._ import models.dataset.credential.CredentialDAO -import models.folder.FolderDAO import models.job.JobDAO import models.organization.OrganizationDAO import models.storage.UsedStorageService import models.team.TeamDAO -import models.user.{User, UserDAO, UserService} -import play.api.i18n.{Messages, MessagesProvider} +import models.user.UserDAO +import play.api.i18n.Messages import play.api.libs.json.Json import play.api.mvc.{Action, AnyContent, PlayBodyParsers} import security.{WebknossosBearerTokenAuthenticatorService, WkSilhouetteEnvironment} @@ -38,13 +41,12 @@ class WKRemoteDataStoreController @Inject()( datasetService: DatasetService, dataStoreService: DataStoreService, dataStoreDAO: DataStoreDAO, - userService: UserService, organizationDAO: OrganizationDAO, usedStorageService: UsedStorageService, + layerToLinkService: LayerToLinkService, datasetDAO: DatasetDAO, datasetLayerDAO: DatasetLayerDAO, userDAO: UserDAO, - folderDAO: FolderDAO, teamDAO: TeamDAO, jobDAO: JobDAO, credentialDAO: CredentialDAO, @@ -71,21 +73,17 @@ class WKRemoteDataStoreController @Inject()( _ <- Fox.fromBool(organization._id == user._organization) ?~> "notAllowed" ~> FORBIDDEN _ <- datasetService.assertValidDatasetName(uploadInfo.name) _ <- Fox.fromBool(dataStore.onlyAllowedOrganization.forall(_ == organization._id)) ?~> "dataset.upload.Datastore.restricted" - folderId = uploadInfo.folderId.getOrElse(organization._rootFolder) - _ <- folderDAO.assertUpdateAccess(folderId)(AuthorizedAccessContext(user)) ?~> "folder.noWriteAccess" - _ <- Fox.serialCombined(uploadInfo.layersToLink.getOrElse(List.empty))(l => validateLayerToLink(l, user)) ?~> "dataset.upload.invalidLinkedLayers" - newDatasetId = ObjectId.generate + _ <- Fox.serialCombined(uploadInfo.layersToLink.getOrElse(List.empty))(l => + layerToLinkService.validateLayerToLink(l, user)) ?~> "dataset.upload.invalidLinkedLayers" _ <- Fox.runIf(request.body.requireUniqueName.getOrElse(false))( datasetService.assertNewDatasetNameUnique(request.body.name, organization._id)) - dataset <- datasetService.createPreliminaryDataset(newDatasetId, - uploadInfo.name, - datasetService.generateDirectoryName(uploadInfo.name, - newDatasetId), - uploadInfo.organization, - dataStore) ?~> "dataset.upload.creation.failed" - _ <- datasetDAO.updateFolder(dataset._id, folderId)(GlobalAccessContext) + preliminaryDataSource = UnusableDataSource(DataSourceId("", ""), None, DataSourceStatus.notYetUploaded) + dataset <- datasetService.createVirtualDataset(uploadInfo.name, + dataStore, + preliminaryDataSource, + uploadInfo.folderId, + user) ?~> "dataset.upload.creation.failed" _ <- datasetService.addInitialTeams(dataset, uploadInfo.initialTeams, user)(AuthorizedAccessContext(user)) - _ <- datasetService.addUploader(dataset, user._id)(AuthorizedAccessContext(user)) additionalInfo = ReserveAdditionalInformation(dataset._id, dataset.directoryName) } yield Ok(Json.toJson(additionalInfo)) } @@ -120,16 +118,6 @@ class WKRemoteDataStoreController @Inject()( } } - private def validateLayerToLink(layerIdentifier: LinkedLayerIdentifier, - requestingUser: User)(implicit ec: ExecutionContext, m: MessagesProvider): Fox[Unit] = - for { - dataset <- datasetDAO.findOne(layerIdentifier.datasetId)(AuthorizedAccessContext(requestingUser)) ?~> Messages( - "dataset.notFound", - layerIdentifier.datasetId) ~> NOT_FOUND - isTeamManagerOrAdmin <- userService.isTeamManagerOrAdminOfOrg(requestingUser, dataset._organization) - _ <- Fox.fromBool(isTeamManagerOrAdmin || requestingUser.isDatasetManager || dataset.isPublic) ?~> "dataset.upload.linkRestricted" - } yield () - def reportDatasetUpload(name: String, key: String, token: String, @@ -145,7 +133,18 @@ class WKRemoteDataStoreController @Inject()( request.body.needsConversion, request.body.datasetSizeBytes, viaAddRoute = false) - // TODO update dataset in db with layersToLink + dataSourceWithLinkedLayersOpt <- Fox.runOptional(request.body.dataSourceOpt) { + implicit val ctx: DBAccessContext = AuthorizedAccessContext(user) + layerToLinkService.addLayersToLinkToDataSource(_, request.body.layersToLink) + } + _ <- Fox.runOptional(dataSourceWithLinkedLayersOpt) { dataSource => + logger.info(s"Updating dataset $datasetId in database after upload reported from datastore $name.") + datasetDAO.updateDataSource(datasetId, + dataset._dataStore, + dataSource.hashCode(), + dataSource, + isUsable = true)(GlobalAccessContext) + } } yield Ok } } diff --git a/app/models/dataset/ComposeService.scala b/app/models/dataset/ComposeService.scala index f30ac84d1bd..1a3f2cff017 100644 --- a/app/models/dataset/ComposeService.scala +++ b/app/models/dataset/ComposeService.scala @@ -48,7 +48,7 @@ class ComposeService @Inject()(datasetDAO: DatasetDAO, dataStoreDAO: DataStoreDA dataset <- datasetService.createVirtualDataset(composeRequest.newDatasetName, dataStore, dataSource, - Some(composeRequest.targetFolderId.toString), + Some(composeRequest.targetFolderId), user) } yield (dataSource, dataset._id) diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index cdb6d3b4830..fd8f3bb3e35 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -85,36 +85,25 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, _ <- Fox.fromBool(!isDatasetNameAlreadyTaken) ?~> "dataset.name.alreadyTaken" } yield () - // TODO consolidate with createVirtualDataset? - def createPreliminaryDataset(newDatasetId: ObjectId, - datasetName: String, - datasetDirectoryName: String, - organizationId: String, - dataStore: DataStore): Fox[Dataset] = { - val unusableDataSource = - UnusableDataSource(DataSourceId(datasetDirectoryName, organizationId), None, DataSourceStatus.notYetUploaded) - createDataset(dataStore, newDatasetId, datasetName, unusableDataSource, isVirtual = true) - } - def createVirtualDataset(datasetName: String, dataStore: DataStore, - dataSource: UsableDataSource, - folderId: Option[String], + dataSource: DataSource, + folderId: Option[ObjectId], user: User): Fox[Dataset] = for { _ <- assertValidDatasetName(datasetName) organization <- organizationDAO.findOne(user._organization)(GlobalAccessContext) ?~> "organization.notFound" - folderId <- ObjectId.fromString(folderId.getOrElse(organization._rootFolder.toString)) ?~> "dataset.upload.folderId.invalid" - _ <- folderDAO.assertUpdateAccess(folderId)(AuthorizedAccessContext(user)) ?~> "folder.noWriteAccess" + folderIdWithFallback = folderId.getOrElse(organization._rootFolder) + _ <- folderDAO.assertUpdateAccess(folderIdWithFallback)(AuthorizedAccessContext(user)) ?~> "folder.noWriteAccess" newDatasetId = ObjectId.generate directoryName = generateDirectoryName(datasetName, newDatasetId) dataset <- createDataset(dataStore, newDatasetId, datasetName, - dataSource.copy(id = DataSourceId(directoryName, organization._id)), + dataSource.withUpdatedId(DataSourceId(directoryName, organization._id)), isVirtual = true) datasetId = dataset._id - _ <- datasetDAO.updateFolder(datasetId, folderId)(GlobalAccessContext) + _ <- datasetDAO.updateFolder(datasetId, folderIdWithFallback)(GlobalAccessContext) _ <- addUploader(dataset, user._id)(GlobalAccessContext) } yield dataset diff --git a/app/models/dataset/DatasetUploadToPathsService.scala b/app/models/dataset/DatasetUploadToPathsService.scala index 9f8dc3f677a..27a01c39d1f 100644 --- a/app/models/dataset/DatasetUploadToPathsService.scala +++ b/app/models/dataset/DatasetUploadToPathsService.scala @@ -38,6 +38,7 @@ class DatasetUploadToPathsService @Inject()(datasetService: DatasetService, organizationDAO: OrganizationDAO, datasetDAO: DatasetDAO, dataStoreDAO: DataStoreDAO, + layerToLinkService: LayerToLinkService, datasetLayerAttachmentsDAO: DatasetLayerAttachmentsDAO, conf: WkConf) extends FoxImplicits @@ -63,7 +64,7 @@ class DatasetUploadToPathsService @Inject()(datasetService: DatasetService, dataSourceWithPaths <- addPathsToDatasource(dataSourceWithNewDirectoryName, organization._id, parameters.pathPrefix) - dataSourceWithLayersToLink <- addLayersToLink(dataSourceWithPaths, parameters.layersToLink) + dataSourceWithLayersToLink <- layerToLinkService.addLayersToLinkToDataSource(dataSourceWithPaths, parameters.layersToLink) _ <- assertValidDataSource(dataSourceWithLayersToLink).toFox dataStore <- findReferencedDataStore(parameters.layersToLink) dataset <- datasetService.createDataset( @@ -209,32 +210,7 @@ class DatasetUploadToPathsService @Inject()(datasetService: DatasetService, layerPath / defaultDirName / (safeAttachmentName + suffix) } - private def addLayersToLink(dataSource: UsableDataSource, layersToLink: Seq[LinkedLayerIdentifier])( - implicit ctx: DBAccessContext, - mp: MessagesProvider, - ec: ExecutionContext): Fox[UsableDataSource] = - for { - linkedLayers <- Fox.serialCombined(layersToLink)(resolveLayerToLink) ?~> "dataset.layerToLink.failed" - allLayers = linkedLayers ++ dataSource.dataLayers - _ <- Fox.fromBool(allLayers.length == allLayers.map(_.name).distinct.length) ?~> "dataset.duplicateLayerNames" - } yield dataSource.copy(dataLayers = allLayers) - private def resolveLayerToLink(layerToLink: LinkedLayerIdentifier)(implicit ctx: DBAccessContext, - ec: ExecutionContext, - mp: MessagesProvider): Fox[StaticLayer] = - for { - dataset <- datasetDAO.findOne(layerToLink.datasetId) ?~> "dataset.notFound" - usableDataSource <- datasetService.usableDataSourceFor(dataset) - layer: StaticLayer <- usableDataSource.dataLayers - .find(_.name == layerToLink.layerName) - .toFox ?~> "dataset.layerToLink.layerNotFound" - newName = layerToLink.newLayerName.getOrElse(layer.name) - layerRenamed: StaticLayer <- layer match { - case l: StaticColorLayer => Fox.successful(l.copy(name = newName)) - case l: StaticSegmentationLayer => Fox.successful(l.copy(name = newName)) - case _ => Fox.failure("Unknown layer type for link") - } - } yield layerRenamed def reserveAttachmentUploadToPath(dataset: Dataset, parameters: ReserveAttachmentUploadToPathRequest)( implicit ec: ExecutionContext, diff --git a/app/models/dataset/LayerToLinkService.scala b/app/models/dataset/LayerToLinkService.scala new file mode 100644 index 00000000000..dfa8388352d --- /dev/null +++ b/app/models/dataset/LayerToLinkService.scala @@ -0,0 +1,59 @@ +package models.dataset + +import com.scalableminds.util.accesscontext.{AuthorizedAccessContext, DBAccessContext} +import com.scalableminds.util.tools.{Fox, FoxImplicits} +import com.scalableminds.webknossos.datastore.models.datasource.{ + StaticColorLayer, + StaticLayer, + StaticSegmentationLayer, + UsableDataSource +} +import com.scalableminds.webknossos.datastore.services.uploading.LinkedLayerIdentifier +import models.user.{User, UserService} +import play.api.http.Status.NOT_FOUND +import play.api.i18n.{Messages, MessagesProvider} + +import javax.inject.Inject +import scala.concurrent.ExecutionContext + +class LayerToLinkService @Inject()(datasetDAO: DatasetDAO, userService: UserService, datasetService: DatasetService) + extends FoxImplicits { + + def validateLayerToLink(layerIdentifier: LinkedLayerIdentifier, + requestingUser: User)(implicit ec: ExecutionContext, m: MessagesProvider): Fox[Unit] = + for { + dataset <- datasetDAO.findOne(layerIdentifier.datasetId)(AuthorizedAccessContext(requestingUser)) ?~> Messages( + "dataset.notFound", + layerIdentifier.datasetId) ~> NOT_FOUND + isTeamManagerOrAdmin <- userService.isTeamManagerOrAdminOfOrg(requestingUser, dataset._organization) + _ <- Fox.fromBool(isTeamManagerOrAdmin || requestingUser.isDatasetManager || dataset.isPublic) ?~> "dataset.upload.linkRestricted" + } yield () + + def addLayersToLinkToDataSource(dataSource: UsableDataSource, layersToLink: Seq[LinkedLayerIdentifier])( + implicit ctx: DBAccessContext, + mp: MessagesProvider, + ec: ExecutionContext): Fox[UsableDataSource] = + for { + linkedLayers <- Fox.serialCombined(layersToLink)(resolveLayerToLink) ?~> "dataset.layerToLink.failed" + allLayers = linkedLayers ++ dataSource.dataLayers + _ <- Fox.fromBool(allLayers.length == allLayers.map(_.name).distinct.length) ?~> "dataset.duplicateLayerNames" + } yield dataSource.copy(dataLayers = allLayers) + + private def resolveLayerToLink(layerToLink: LinkedLayerIdentifier)(implicit ctx: DBAccessContext, + ec: ExecutionContext, + mp: MessagesProvider): Fox[StaticLayer] = + for { + dataset <- datasetDAO.findOne(layerToLink.datasetId) ?~> "dataset.notFound" + usableDataSource <- datasetService.usableDataSourceFor(dataset) + layer: StaticLayer <- usableDataSource.dataLayers + .find(_.name == layerToLink.layerName) + .toFox ?~> "dataset.layerToLink.layerNotFound" + newName = layerToLink.newLayerName.getOrElse(layer.name) + layerRenamed: StaticLayer <- layer match { + case l: StaticColorLayer => Fox.successful(l.copy(name = newName)) + case l: StaticSegmentationLayer => Fox.successful(l.copy(name = newName)) + case _ => Fox.failure("Unknown layer type for link") + } + } yield layerRenamed + +} diff --git a/conf/application.conf b/conf/application.conf index 31b561ad56b..18d9e5b02fd 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -156,8 +156,8 @@ features { taskReopenAllowedInSeconds = 30 allowDeleteDatasets = true # to enable jobs for local development, use "yarn enable-jobs" to also activate it in the database - jobsEnabled = false - voxelyticsEnabled = false + jobsEnabled = true + voxelyticsEnabled = true neuronInferralCostPerGVx = 1 mitochondriaInferralCostPerGVx = 0.5 alignmentCostPerGVx = 0.5 diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index e2eb7cfa16a..278c7b238f9 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -33,6 +33,7 @@ import com.scalableminds.webknossos.datastore.services.connectome.{ SynapticPartnerDirection } import com.scalableminds.webknossos.datastore.services.mapping.AgglomerateService +import com.scalableminds.webknossos.datastore.slacknotification.DSSlackNotificationService import play.api.data.Form import play.api.data.Forms.{longNumber, nonEmptyText, number, tuple} import play.api.libs.Files @@ -63,6 +64,7 @@ class DataSourceController @Inject()( segmentIndexFileService: SegmentIndexFileService, agglomerateService: AgglomerateService, storageUsageService: DSUsedStorageService, + slackNotificationService: DSSlackNotificationService, datasetErrorLoggingService: DSDatasetErrorLoggingService, exploreRemoteLayerService: ExploreRemoteLayerService, uploadService: UploadService, @@ -183,7 +185,7 @@ class DataSourceController @Inject()( } def finishUpload(): Action[UploadInformation] = Action.async(validateJson[UploadInformation]) { implicit request => - log() { + log(Some(slackNotificationService.noticeFailedFinishUpload)) { for { datasetId <- uploadService.getDatasetIdByUploadId(request.body.uploadId) ?~> "dataset.upload.validation.failed" response <- accessTokenService.validateAccessFromTokenContext(UserAccessRequest.writeDataset(datasetId)) { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index efef9384d27..c95336f852b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -250,27 +250,23 @@ class DataSourceService @Inject()( } } - def resolvePathsInNewBasePath(dataSource: DataSource, newBasePath: UPath): Fox[DataSource] = - dataSource.toUsable match { - case Some(usableDataSource) => - val updatedDataLayers = usableDataSource.dataLayers.map { layer => - layer.mapped( - magMapping = mag => - mag.path match { - case Some(existingMagPath) => mag.copy(path = Some(existingMagPath.resolvedIn(newBasePath))) - // If the mag does not have a path, it is an implicit path, we need to make it explicit. - case _ => - mag.copy( - path = Some(newBasePath / layer.name / mag.mag.toMagLiteral(true)) - ) - }, - attachmentMapping = _.resolvedIn(newBasePath) - ) - } - Fox.successful(usableDataSource.copy(dataLayers = updatedDataLayers)) - case None => - Fox.failure("Cannot replace paths of unusable datasource") + def resolvePathsInNewBasePath(dataSource: UsableDataSource, newBasePath: UPath): UsableDataSource = { + val updatedDataLayers = dataSource.dataLayers.map { layer => + layer.mapped( + magMapping = mag => + mag.path match { + case Some(existingMagPath) => mag.copy(path = Some(existingMagPath.resolvedIn(newBasePath))) + // If the mag does not have a path, it is an implicit path, we need to make it explicit. + case _ => + mag.copy( + path = Some(newBasePath / layer.name / mag.mag.toMagLiteral(true)) + ) + }, + attachmentMapping = _.resolvedIn(newBasePath) + ) } + dataSource.copy(dataLayers = updatedDataLayers) + } private def resolveAttachmentsAndAddScanned(dataSourcePath: Path, dataSource: UsableDataSource) = dataSource.dataLayers.map(dataLayer => { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 54624af070f..8b666a17340 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -69,7 +69,7 @@ object ReserveAdditionalInformation { case class ReportDatasetUploadParameters( needsConversion: Boolean, datasetSizeBytes: Long, - dataSourceOpt: Option[DataSource], // must be set if needsConversion is false + dataSourceOpt: Option[UsableDataSource], // must be set if needsConversion is false layersToLink: Seq[LinkedLayerIdentifier] ) object ReportDatasetUploadParameters { @@ -303,7 +303,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, runningUploadMetadataStore.removeFromSet(redisKeyForFileChunkSet(uploadId, filePath), String.valueOf(currentChunkNumber)) val errorMsg = - s"Error receiving chunk $currentChunkNumber for uploadId $uploadId (datsetId $datasetId): ${e.getMessage}" + s"Error receiving chunk $currentChunkNumber for uploadId $uploadId (datasetId $datasetId): ${e.getMessage}" logger.warn(errorMsg) Fox.failure(errorMsg) } @@ -344,7 +344,11 @@ class UploadService @Inject()(dataSourceService: DataSourceService, for { dataSourceId <- getDataSourceIdByUploadId(uploadId) datasetId <- getDatasetIdByUploadId(uploadId) + linkedLayerIdentifiers <- getObjectFromRedis[LinkedLayerIdentifiers](redisKeyForLinkedLayerIdentifier(uploadId)) _ = logger.info(s"Finishing dataset upload $uploadId of datasetId $datasetId ($dataSourceId)...") + _ <- Fox.fromBool( + !uploadInformation.needsConversion.getOrElse(false) || !linkedLayerIdentifiers.layersToLink + .exists(_.nonEmpty)) ?~> "Cannot use linked layers if the dataset needs conversion" needsConversion = uploadInformation.needsConversion.getOrElse(false) uploadDir = uploadDirectoryFor(dataSourceId.organizationId, uploadId) _ <- assertWithinRequestedFileSizeAndCleanUpOtherwise(uploadDir, uploadId) @@ -367,7 +371,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, label = s"processing dataset at $unpackToDir") datasetSizeBytes <- tryo(FileUtils.sizeOfDirectoryAsBigInteger(new File(unpackToDir.toString)).longValue).toFox dataSourceWithAbsolutePathsOpt <- moveUnpackedToTarget(unpackToDir, needsConversion, datasetId, dataSourceId) - linkedLayerIdentifiers <- getObjectFromRedis[LinkedLayerIdentifiers](redisKeyForLinkedLayerIdentifier(uploadId)) + _ <- remoteWebknossosClient.reportUpload( datasetId, ReportDatasetUploadParameters( @@ -380,7 +384,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } yield datasetId } - private def deleteFilesNotReferencedInDataSource(unpackedDir: Path, dataSource: DataSource): Fox[Unit] = + private def deleteFilesNotReferencedInDataSource(unpackedDir: Path, dataSource: UsableDataSource): Fox[Unit] = for { filesToDelete <- findNonReferencedFiles(unpackedDir, dataSource) _ = if (filesToDelete.nonEmpty) @@ -399,7 +403,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, private def moveUnpackedToTarget(unpackedDir: Path, needsConversion: Boolean, datasetId: ObjectId, - dataSourceId: DataSourceId): Fox[Option[DataSource]] = + dataSourceId: DataSourceId): Fox[Option[UsableDataSource]] = if (needsConversion) { val forConversionPath = dataBaseDir.resolve(dataSourceId.organizationId).resolve(forConversionDir).resolve(dataSourceId.directoryName) @@ -410,7 +414,9 @@ class UploadService @Inject()(dataSourceService: DataSourceService, for { dataSourceFromDir <- Fox.successful( dataSourceService.dataSourceFromDir(unpackedDir, dataSourceId.organizationId)) - _ <- deleteFilesNotReferencedInDataSource(unpackedDir, dataSourceFromDir) + usableDataSourceFromDir <- dataSourceFromDir.toUsable.toFox ?~> s"Invalid dataset uploaded: ${dataSourceFromDir.statusOpt + .getOrElse("")}" + _ <- deleteFilesNotReferencedInDataSource(unpackedDir, usableDataSourceFromDir) newBasePath <- if (dataStoreConfig.Datastore.S3Upload.enabled) { for { s3UploadBucket <- s3UploadBucketOpt.toFox @@ -430,7 +436,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ <- tryo(FileUtils.moveDirectory(unpackedDir.toFile, finalUploadedLocalPath.toFile)).toFox } yield UPath.fromLocalPath(finalUploadedLocalPath) } - dataSourceWithAdaptedPaths <- dataSourceService.resolvePathsInNewBasePath(dataSourceFromDir, newBasePath) + dataSourceWithAdaptedPaths = dataSourceService.resolvePathsInNewBasePath(usableDataSourceFromDir, newBasePath) _ = this.synchronized { PathUtils.deleteDirectoryRecursively(unpackedDir) } @@ -564,25 +570,27 @@ class UploadService @Inject()(dataSourceService: DataSourceService, s"Some files failed to upload to S3: $failedTransfers" } yield () - private def findNonReferencedFiles(unpackedDir: Path, dataSource: DataSource): Fox[List[Path]] = + private def findNonReferencedFiles(unpackedDir: Path, dataSource: UsableDataSource): Fox[List[Path]] = { + val explicitPaths: Set[Path] = dataSource.dataLayers + .flatMap(layer => layer.allExplicitPaths.flatMap(_.toLocalPath)) + .map(unpackedDir.resolve) + .toSet + val additionalMagPaths: Set[Path] = dataSource.dataLayers + .flatMap(layer => + layer.mags.map(mag => + mag.path match { + case Some(_) => None + case None => Some(unpackedDir.resolve(List(layer.name, mag.mag.toMagLiteral(true)).mkString("/"))) + })) + .flatten + .toSet + + val allReferencedPaths = explicitPaths ++ additionalMagPaths for { - usableDataSource <- dataSource.toUsable.toFox ?~> "Data source is not usable" - explicitPaths: Set[Path] = usableDataSource.dataLayers - .flatMap(layer => - layer.mags.map(mag => - mag.path match { - case Some(_) => None - case None => Some(unpackedDir.resolve(List(layer.name, mag.mag.toMagLiteral(true)).mkString("/"))) - })) - .flatten - .toSet - neededPaths = usableDataSource.dataLayers - .flatMap(layer => layer.allExplicitPaths.flatMap(_.toLocalPath)) - .map(unpackedDir.resolve) - .toSet ++ explicitPaths allFiles <- PathUtils.listFilesRecursive(unpackedDir, silent = true, maxDepth = 10).toFox - filesToDelete = allFiles.filterNot(file => neededPaths.exists(neededPath => file.startsWith(neededPath))) + filesToDelete = allFiles.filterNot(file => allReferencedPaths.exists(neededPath => file.startsWith(neededPath))) } yield filesToDelete + } private def cleanUpOnFailure[T](result: Box[T], datasetId: ObjectId, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/slacknotification/DSSlackNotificationService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/slacknotification/DSSlackNotificationService.scala index f947e0f2d43..d8096f633fb 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/slacknotification/DSSlackNotificationService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/slacknotification/DSSlackNotificationService.scala @@ -23,4 +23,9 @@ class DSSlackNotificationService @Inject()(rpc: RPC, config: DataStoreConfig) ex msg = e.getMessage ) + def noticeFailedFinishUpload(msg: String): Unit = + slackClient.warn( + title = "Failed finishUpload request", + msg = msg + ) } From a50f98a3c75535e69f413eaf86a29c069a3de7e6 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 25 Sep 2025 14:24:23 +0200 Subject: [PATCH 22/62] rename LegacyController to DSLegacyApiController --- ...ller.scala => DSLegacyApiController.scala} | 2 +- .../conf/datastore.versioned.routes | 352 +++++++++--------- 2 files changed, 177 insertions(+), 177 deletions(-) rename webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/{LegacyController.scala => DSLegacyApiController.scala} (99%) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala similarity index 99% rename from webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala rename to webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala index 7b8a2f73944..e69617747d5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/LegacyController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala @@ -58,7 +58,7 @@ object LegacyLinkedLayerIdentifier { implicit val jsonFormat: OFormat[LegacyLinkedLayerIdentifier] = Json.format[LegacyLinkedLayerIdentifier] } -class LegacyController @Inject()( +class DSLegacyApiController @Inject()( accessTokenService: DataStoreAccessTokenService, remoteWebknossosClient: DSRemoteWebknossosClient, binaryDataController: BinaryDataController, diff --git a/webknossos-datastore/conf/datastore.versioned.routes b/webknossos-datastore/conf/datastore.versioned.routes index b3e3a6d9cd6..5f138d18fc7 100644 --- a/webknossos-datastore/conf/datastore.versioned.routes +++ b/webknossos-datastore/conf/datastore.versioned.routes @@ -2,247 +2,247 @@ -> /v12/ datastore.latest.Routes -POST /v11/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() +POST /v11/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveUploadV11() -> /v11/ datastore.latest.Routes -POST /v10/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() +POST /v10/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveManualUploadV10() -POST /v10/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() +POST /v10/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveUploadV11() -> /v10/ datastore.latest.Routes # Read image data -POST /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestViaWebknossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -POST /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/readData @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawCuboidPostV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawCuboidV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, depth: Int, mag: String, halfByte: Boolean ?= false, mappingName: Option[String]) -GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/thumbnail.jpg @com.scalableminds.webknossos.datastore.controllers.LegacyController.thumbnailJpegV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, mag: String, mappingName: Option[String], intensityMin: Option[Double], intensityMax: Option[Double], color: Option[String], invertColor: Option[Boolean]) -GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/findData @com.scalableminds.webknossos.datastore.controllers.LegacyController.findDataV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/histogram @com.scalableminds.webknossos.datastore.controllers.LegacyController.histogramV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +POST /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestViaWebknossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +POST /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/readData @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawCuboidPostV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawCuboidV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, depth: Int, mag: String, halfByte: Boolean ?= false, mappingName: Option[String]) +GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/thumbnail.jpg @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.thumbnailJpegV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, mag: String, mappingName: Option[String], intensityMin: Option[Double], intensityMax: Option[Double], color: Option[String], invertColor: Option[Boolean]) +GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/findData @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.findDataV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/histogram @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.histogramV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mag:mag/x:x/y:y/z:z/bucket.raw @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestViaKnossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: Int, x: Int, y: Int, z: Int, cubeSize: Int) +GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mag:mag/x:x/y:y/z:z/bucket.raw @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestViaKnossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: Int, x: Int, y: Int, z: Int, cubeSize: Int) -POST /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/adHocMesh @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestAdHocMeshV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mappings/:mappingName @com.scalableminds.webknossos.datastore.controllers.LegacyController.mappingJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) +POST /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/adHocMesh @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestAdHocMeshV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v9/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mappings/:mappingName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.mappingJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) -POST /v9/triggers/reload/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.reloadDatasourceV9(organizationId: String, datasetDirectoryName: String, layerName: Option[String]) +POST /v9/triggers/reload/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reloadDatasourceV9(organizationId: String, datasetDirectoryName: String, layerName: Option[String]) # Zarr2 compatible routes -GET /v9/zarr/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v9/zarr/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v9/zarr/:organizationId/:datasetDirectoryName/.zgroup @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName="") -GET /v9/zarr/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) -GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) -GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zattrs @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZAttrsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zgroup @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) -GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) -GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/.zarray @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZArrayV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) -GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) +GET /v9/zarr/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v9/zarr/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v9/zarr/:organizationId/:datasetDirectoryName/.zgroup @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName="") +GET /v9/zarr/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) +GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) +GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zattrs @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZAttrsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zgroup @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) +GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) +GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/.zarray @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZArrayV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) +GET /v9/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) # Zarr3 compatible routes -GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) -GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) -GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/zarr.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZarrJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) -GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) -GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/zarr.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZarrJsonForMagV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) -GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) +GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) +GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) +GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/zarr.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZarrJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) +GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) +GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/zarr.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZarrJsonForMagV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) +GET /v9/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) -POST /v9/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() +POST /v9/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveManualUploadV10() -POST /v9/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() +POST /v9/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveUploadV11() -> /v9/ datastore.latest.Routes # Read image data -POST /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestViaWebknossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -POST /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/readData @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawCuboidPostV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawCuboidV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, depth: Int, mag: String, halfByte: Boolean ?= false, mappingName: Option[String]) -GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/thumbnail.jpg @com.scalableminds.webknossos.datastore.controllers.LegacyController.thumbnailJpegV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, mag: String, mappingName: Option[String], intensityMin: Option[Double], intensityMax: Option[Double], color: Option[String], invertColor: Option[Boolean]) -GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/findData @com.scalableminds.webknossos.datastore.controllers.LegacyController.findDataV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/histogram @com.scalableminds.webknossos.datastore.controllers.LegacyController.histogramV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +POST /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestViaWebknossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +POST /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/readData @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawCuboidPostV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawCuboidV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, depth: Int, mag: String, halfByte: Boolean ?= false, mappingName: Option[String]) +GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/thumbnail.jpg @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.thumbnailJpegV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, mag: String, mappingName: Option[String], intensityMin: Option[Double], intensityMax: Option[Double], color: Option[String], invertColor: Option[Boolean]) +GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/findData @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.findDataV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/histogram @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.histogramV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mag:mag/x:x/y:y/z:z/bucket.raw @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestViaKnossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: Int, x: Int, y: Int, z: Int, cubeSize: Int) +GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mag:mag/x:x/y:y/z:z/bucket.raw @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestViaKnossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: Int, x: Int, y: Int, z: Int, cubeSize: Int) -POST /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/adHocMesh @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestAdHocMeshV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mappings/:mappingName @com.scalableminds.webknossos.datastore.controllers.LegacyController.mappingJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) +POST /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/adHocMesh @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestAdHocMeshV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v8/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mappings/:mappingName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.mappingJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) -POST /v8/triggers/reload/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.reloadDatasourceV9(organizationId: String, datasetDirectoryName: String, layerName: Option[String]) +POST /v8/triggers/reload/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reloadDatasourceV9(organizationId: String, datasetDirectoryName: String, layerName: Option[String]) # Zarr2 compatible routes -GET /v8/zarr/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v8/zarr/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v8/zarr/:organizationId/:datasetDirectoryName/.zgroup @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName="") -GET /v8/zarr/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) -GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) -GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zattrs @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZAttrsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zgroup @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) -GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) -GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/.zarray @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZArrayV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) -GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) +GET /v8/zarr/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v8/zarr/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v8/zarr/:organizationId/:datasetDirectoryName/.zgroup @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName="") +GET /v8/zarr/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) +GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) +GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zattrs @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZAttrsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zgroup @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) +GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) +GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/.zarray @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZArrayV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) +GET /v8/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) # Zarr3 compatible routes -GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) -GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) -GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/zarr.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZarrJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) -GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) -GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/zarr.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZarrJsonForMagV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) -GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) +GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) +GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) +GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/zarr.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZarrJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) +GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) +GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/zarr.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZarrJsonForMagV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) +GET /v8/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) -POST /v8/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() +POST /v8/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveManualUploadV10() -POST /v8/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() +POST /v8/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveUploadV11() -> /v8/ datastore.latest.Routes # Read image data -POST /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestViaWebknossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -POST /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/readData @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawCuboidPostV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawCuboidV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, depth: Int, mag: String, halfByte: Boolean ?= false, mappingName: Option[String]) -GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/thumbnail.jpg @com.scalableminds.webknossos.datastore.controllers.LegacyController.thumbnailJpegV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, mag: String, mappingName: Option[String], intensityMin: Option[Double], intensityMax: Option[Double], color: Option[String], invertColor: Option[Boolean]) -GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/findData @com.scalableminds.webknossos.datastore.controllers.LegacyController.findDataV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/histogram @com.scalableminds.webknossos.datastore.controllers.LegacyController.histogramV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +POST /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestViaWebknossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +POST /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/readData @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawCuboidPostV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawCuboidV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, depth: Int, mag: String, halfByte: Boolean ?= false, mappingName: Option[String]) +GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/thumbnail.jpg @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.thumbnailJpegV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, mag: String, mappingName: Option[String], intensityMin: Option[Double], intensityMax: Option[Double], color: Option[String], invertColor: Option[Boolean]) +GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/findData @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.findDataV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/histogram @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.histogramV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mag:mag/x:x/y:y/z:z/bucket.raw @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestViaKnossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: Int, x: Int, y: Int, z: Int, cubeSize: Int) +GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mag:mag/x:x/y:y/z:z/bucket.raw @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestViaKnossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: Int, x: Int, y: Int, z: Int, cubeSize: Int) -POST /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/adHocMesh @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestAdHocMeshV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mappings/:mappingName @com.scalableminds.webknossos.datastore.controllers.LegacyController.mappingJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) +POST /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/adHocMesh @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestAdHocMeshV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v7/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mappings/:mappingName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.mappingJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) -POST /v7/triggers/reload/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.reloadDatasourceV9(organizationId: String, datasetDirectoryName: String, layerName: Option[String]) +POST /v7/triggers/reload/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reloadDatasourceV9(organizationId: String, datasetDirectoryName: String, layerName: Option[String]) # Zarr2 compatible routes -GET /v7/zarr/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v7/zarr/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v7/zarr/:organizationId/:datasetDirectoryName/.zgroup @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName="") -GET /v7/zarr/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) -GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) -GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zattrs @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZAttrsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zgroup @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) -GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) -GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/.zarray @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZArrayV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) -GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) +GET /v7/zarr/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v7/zarr/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v7/zarr/:organizationId/:datasetDirectoryName/.zgroup @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName="") +GET /v7/zarr/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) +GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) +GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zattrs @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZAttrsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zgroup @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) +GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) +GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/.zarray @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZArrayV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) +GET /v7/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) # Zarr3 compatible routes -GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) -GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) -GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/zarr.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZarrJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) -GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) -GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/zarr.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZarrJsonForMagV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) -GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) +GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) +GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) +GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/zarr.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZarrJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) +GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) +GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/zarr.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZarrJsonForMagV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) +GET /v7/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) -POST /v7/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() +POST /v7/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveManualUploadV10() -POST /v7/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() +POST /v7/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveUploadV11() -> /v7/ datastore.latest.Routes # Read image data -POST /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestViaWebknossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -POST /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/readData @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawCuboidPostV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawCuboidV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, depth: Int, mag: String, halfByte: Boolean ?= false, mappingName: Option[String]) -GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/thumbnail.jpg @com.scalableminds.webknossos.datastore.controllers.LegacyController.thumbnailJpegV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, mag: String, mappingName: Option[String], intensityMin: Option[Double], intensityMax: Option[Double], color: Option[String], invertColor: Option[Boolean]) -GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/findData @com.scalableminds.webknossos.datastore.controllers.LegacyController.findDataV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/histogram @com.scalableminds.webknossos.datastore.controllers.LegacyController.histogramV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +POST /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestViaWebknossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +POST /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/readData @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawCuboidPostV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawCuboidV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, depth: Int, mag: String, halfByte: Boolean ?= false, mappingName: Option[String]) +GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/thumbnail.jpg @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.thumbnailJpegV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, mag: String, mappingName: Option[String], intensityMin: Option[Double], intensityMax: Option[Double], color: Option[String], invertColor: Option[Boolean]) +GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/findData @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.findDataV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/histogram @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.histogramV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mag:mag/x:x/y:y/z:z/bucket.raw @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestViaKnossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: Int, x: Int, y: Int, z: Int, cubeSize: Int) +GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mag:mag/x:x/y:y/z:z/bucket.raw @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestViaKnossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: Int, x: Int, y: Int, z: Int, cubeSize: Int) -POST /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/adHocMesh @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestAdHocMeshV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mappings/:mappingName @com.scalableminds.webknossos.datastore.controllers.LegacyController.mappingJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) +POST /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/adHocMesh @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestAdHocMeshV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v6/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mappings/:mappingName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.mappingJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) -POST /v6/triggers/reload/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.reloadDatasourceV9(organizationId: String, datasetDirectoryName: String, layerName: Option[String]) +POST /v6/triggers/reload/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reloadDatasourceV9(organizationId: String, datasetDirectoryName: String, layerName: Option[String]) # Zarr2 compatible routes -GET /v6/zarr/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v6/zarr/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v6/zarr/:organizationId/:datasetDirectoryName/.zgroup @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName="") -GET /v6/zarr/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) -GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) -GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zattrs @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZAttrsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zgroup @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) -GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) -GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/.zarray @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZArrayV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) -GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) +GET /v6/zarr/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v6/zarr/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v6/zarr/:organizationId/:datasetDirectoryName/.zgroup @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName="") +GET /v6/zarr/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) +GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) +GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zattrs @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZAttrsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zgroup @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) +GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) +GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/.zarray @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZArrayV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) +GET /v6/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) # Zarr3 compatible routes -GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) -GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) -GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/zarr.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZarrJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) -GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) -GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/zarr.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZarrJsonForMagV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) -GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) +GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) +GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) +GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/zarr.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZarrJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) +GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) +GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/zarr.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZarrJsonForMagV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) +GET /v6/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) -POST /v6/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() +POST /v6/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveManualUploadV10() -POST /v6/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() +POST /v6/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveUploadV11() -> /v6/ datastore.latest.Routes # Read image data -POST /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestViaWebknossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -POST /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/readData @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawCuboidPostV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawCuboidV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, depth: Int, mag: String, halfByte: Boolean ?= false, mappingName: Option[String]) -GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/thumbnail.jpg @com.scalableminds.webknossos.datastore.controllers.LegacyController.thumbnailJpegV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, mag: String, mappingName: Option[String], intensityMin: Option[Double], intensityMax: Option[Double], color: Option[String], invertColor: Option[Boolean]) -GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/findData @com.scalableminds.webknossos.datastore.controllers.LegacyController.findDataV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/histogram @com.scalableminds.webknossos.datastore.controllers.LegacyController.histogramV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mag:mag/x:x/y:y/z:z/bucket.raw @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestViaKnossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: Int, x: Int, y: Int, z: Int, cubeSize: Int) -POST /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/adHocMesh @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestAdHocMeshV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mappings/:mappingName @com.scalableminds.webknossos.datastore.controllers.LegacyController.mappingJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) - -POST /v5/triggers/reload/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.reloadDatasourceV9(organizationId: String, datasetDirectoryName: String, layerName: Option[String]) +POST /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestViaWebknossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +POST /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/readData @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawCuboidPostV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/data @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawCuboidV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, depth: Int, mag: String, halfByte: Boolean ?= false, mappingName: Option[String]) +GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/thumbnail.jpg @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.thumbnailJpegV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, x: Int, y: Int, z: Int, width: Int, height: Int, mag: String, mappingName: Option[String], intensityMin: Option[Double], intensityMax: Option[Double], color: Option[String], invertColor: Option[Boolean]) +GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/findData @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.findDataV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/histogram @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.histogramV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mag:mag/x:x/y:y/z:z/bucket.raw @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestViaKnossosV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: Int, x: Int, y: Int, z: Int, cubeSize: Int) +POST /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/adHocMesh @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestAdHocMeshV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v5/datasets/:organizationId/:datasetDirectoryName/layers/:dataLayerName/mappings/:mappingName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.mappingJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mappingName: String) + +POST /v5/triggers/reload/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reloadDatasourceV9(organizationId: String, datasetDirectoryName: String, layerName: Option[String]) # Zarr2 compatible routes -GET /v5/zarr/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v5/zarr/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v5/zarr/:organizationId/:datasetDirectoryName/.zgroup @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName="") -GET /v5/zarr/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) -GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) -GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) -GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zattrs @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZAttrsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zgroup @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) -GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) -GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/.zarray @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZArrayV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) -GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) +GET /v5/zarr/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v5/zarr/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v5/zarr/:organizationId/:datasetDirectoryName/.zgroup @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName="") +GET /v5/zarr/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 2) +GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) +GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 2) +GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zattrs @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZAttrsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/.zgroup @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZGroupV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) +GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 2) +GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/.zarray @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZArrayV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) +GET /v5/zarr/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) # Zarr3 compatible routes -GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) -GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) -GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) -GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/zarr.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZarrJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) -GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) -GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) -GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/zarr.json @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestZarrJsonForMagV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) -GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.LegacyController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) - -POST /v5/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveManualUploadV10() - -POST /v5/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.LegacyController.reserveUploadV11() +GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/datasource-properties.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataSourceV9(organizationId: String, datasetDirectoryName: String, zarrVersion: Int = 3) +GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) +GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, zarrVersion: Int = 3) +GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/zarr.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZarrJsonV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String) +GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) +GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/ @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestDataLayerMagDirectoryContentsV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, zarrVersion: Int = 3) +GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/zarr.json @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestZarrJsonForMagV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String) +GET /v5/zarr3_experimental/:organizationId/:datasetDirectoryName/:dataLayerName/:mag/:coordinates @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.requestRawZarrCubeV9(organizationId: String, datasetDirectoryName: String, dataLayerName: String, mag: String, coordinates: String) + +POST /v5/datasets/reserveManualUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveManualUploadV10() + +POST /v5/datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.DSLegacyApiController.reserveUploadV11() -> /v5/ datastore.latest.Routes -> / datastore.latest.Routes From b79c5a31dec5ad730e14d997bfbe094ad0169307 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 25 Sep 2025 14:35:56 +0200 Subject: [PATCH 23/62] fix legacy api adapter --- .../controllers/DSLegacyApiController.scala | 50 +++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala index e69617747d5..dd7f5a2efef 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala @@ -12,7 +12,7 @@ import com.scalableminds.webknossos.datastore.models.{ } import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, UnusableDataSource, UsableDataSource} import com.scalableminds.webknossos.datastore.services.mesh.FullMeshRequest -import com.scalableminds.webknossos.datastore.services.uploading.ReserveUploadInformation +import com.scalableminds.webknossos.datastore.services.uploading.{LinkedLayerIdentifier, ReserveUploadInformation} import com.scalableminds.webknossos.datastore.services.{ DSRemoteWebknossosClient, DataSourceService, @@ -37,6 +37,22 @@ object LegacyReserveManualUploadInformation { Json.format[LegacyReserveManualUploadInformation] } +case class LegacyReserveUploadInformation( + uploadId: String, // upload id that was also used in chunk upload (this time without file paths) + name: String, // dataset name + organization: String, + totalFileCount: Long, + filePaths: Option[List[String]], + totalFileSizeInBytes: Option[Long], + layersToLink: Option[List[LegacyLinkedLayerIdentifier]], + initialTeams: List[ObjectId], // team ids + folderId: Option[ObjectId], + requireUniqueName: Option[Boolean] +) +object LegacyReserveUploadInformation { + implicit val jsonFormat: OFormat[LegacyReserveUploadInformation] = Json.format[LegacyReserveUploadInformation] +} + case class LegacyLinkedLayerIdentifier(organizationId: Option[String], organizationName: Option[String], // Filled by backend after identifying the dataset by name. Afterwards this updated value is stored in the redis database. @@ -48,7 +64,6 @@ case class LegacyLinkedLayerIdentifier(organizationId: Option[String], def getOrganizationId: String = this.organizationId.getOrElse(this.organizationName.getOrElse("")) } -// TODO use object LegacyLinkedLayerIdentifier { def apply(organizationId: String, dataSetName: String, @@ -75,7 +90,36 @@ class DSLegacyApiController @Inject()( override def allowRemoteOrigin: Boolean = true - def reserveUploadV11(): Action[LegacyReserveManualUploadInformation] = ??? // TODO + def reserveUploadV11(): Action[LegacyReserveUploadInformation] = + Action.async(validateJson[LegacyReserveUploadInformation]) { implicit request => + accessTokenService.validateAccessFromTokenContext( + UserAccessRequest.administrateDataSources(request.body.organization)) { + + for { + adaptedLayersToLink <- Fox.serialCombined(request.body.layersToLink.getOrElse(List.empty))(adaptLayerToLink) + adaptedRequestBody = ReserveUploadInformation( + uploadId = request.body.uploadId, + name = request.body.name, + organization = request.body.organization, + totalFileCount = request.body.totalFileCount, + filePaths = request.body.filePaths, + totalFileSizeInBytes = request.body.totalFileSizeInBytes, + layersToLink = Some(adaptedLayersToLink), + initialTeams = request.body.initialTeams, + folderId = request.body.folderId, + requireUniqueName = request.body.requireUniqueName, + isVirtual = None + ) + result <- Fox.fromFuture(dataSourceController.reserveUpload()(request.withBody(adaptedRequestBody))) + } yield result + } + } + + private def adaptLayerToLink(legacyLayerToLink: LegacyLinkedLayerIdentifier): Fox[LinkedLayerIdentifier] = + for { + datasetId <- remoteWebknossosClient.getDatasetId(legacyLayerToLink.getOrganizationId, + legacyLayerToLink.dataSetName) + } yield LinkedLayerIdentifier(datasetId, legacyLayerToLink.layerName, legacyLayerToLink.newLayerName) // To be called by people with disk access but not DatasetManager role. This way, they can upload a dataset manually on disk, // and it can be put in a webknossos folder where they have access From 174b9a7144ceda9a8cc234d7a332be61cb13c090 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 25 Sep 2025 14:44:49 +0200 Subject: [PATCH 24/62] cleanup --- app/models/dataset/ComposeService.scala | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/app/models/dataset/ComposeService.scala b/app/models/dataset/ComposeService.scala index 1a3f2cff017..3b7ff349a5e 100644 --- a/app/models/dataset/ComposeService.scala +++ b/app/models/dataset/ComposeService.scala @@ -64,13 +64,9 @@ class ComposeService @Inject()(datasetDAO: DatasetDAO, dataStoreDAO: DataStoreDA case Some(c) => Some(c ++ composeLayer.transformations.toList) case None => Some(composeLayer.transformations.toList) } - editedLayer: StaticLayer <- layer match { - case l: StaticLayer => - Fox.successful( - l.mapped(name = composeLayer.newName, - coordinateTransformations = applyCoordinateTransformations(l.coordinateTransformations))) - case _ => Fox.failure("Unsupported layer type for composition: " + layer.getClass.getSimpleName) - } + editedLayer = layer.mapped(name = composeLayer.newName, + coordinateTransformations = + applyCoordinateTransformations(layer.coordinateTransformations)) } yield editedLayer private def isComposable(composeRequest: ComposeRequest)(implicit ctx: DBAccessContext): Fox[Boolean] = From 328d1cc5074bc75461c59f80821950234632440b Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 25 Sep 2025 14:52:12 +0200 Subject: [PATCH 25/62] format --- app/models/dataset/DatasetUploadToPathsService.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/models/dataset/DatasetUploadToPathsService.scala b/app/models/dataset/DatasetUploadToPathsService.scala index 27a01c39d1f..8a0ea2bc160 100644 --- a/app/models/dataset/DatasetUploadToPathsService.scala +++ b/app/models/dataset/DatasetUploadToPathsService.scala @@ -64,7 +64,8 @@ class DatasetUploadToPathsService @Inject()(datasetService: DatasetService, dataSourceWithPaths <- addPathsToDatasource(dataSourceWithNewDirectoryName, organization._id, parameters.pathPrefix) - dataSourceWithLayersToLink <- layerToLinkService.addLayersToLinkToDataSource(dataSourceWithPaths, parameters.layersToLink) + dataSourceWithLayersToLink <- layerToLinkService.addLayersToLinkToDataSource(dataSourceWithPaths, + parameters.layersToLink) _ <- assertValidDataSource(dataSourceWithLayersToLink).toFox dataStore <- findReferencedDataStore(parameters.layersToLink) dataset <- datasetService.createDataset( @@ -210,8 +211,6 @@ class DatasetUploadToPathsService @Inject()(datasetService: DatasetService, layerPath / defaultDirName / (safeAttachmentName + suffix) } - - def reserveAttachmentUploadToPath(dataset: Dataset, parameters: ReserveAttachmentUploadToPathRequest)( implicit ec: ExecutionContext, mp: MessagesProvider): Fox[UPath] = From 297459a04fcbedf025378c9cc4897e23dae982fb Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 25 Sep 2025 14:52:25 +0200 Subject: [PATCH 26/62] reset application.conf --- conf/application.conf | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/conf/application.conf b/conf/application.conf index 18d9e5b02fd..0135bce1b90 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -156,8 +156,8 @@ features { taskReopenAllowedInSeconds = 30 allowDeleteDatasets = true # to enable jobs for local development, use "yarn enable-jobs" to also activate it in the database - jobsEnabled = true - voxelyticsEnabled = true + jobsEnabled = false + voxelyticsEnabled = false neuronInferralCostPerGVx = 1 mitochondriaInferralCostPerGVx = 0.5 alignmentCostPerGVx = 0.5 @@ -246,12 +246,6 @@ datastore { # The credentials are selected by uri prefix, so different s3 uri styles may need duplicated credential entries. credentials = [] } - s3Upload { - enabled = false - # Use the name of a credential in the dataVaults section here to use it for uploads. - credentialName = "s3://example/uri/prefix" - objectKeyPrefix = "webknossos-uploads" - } } # Redirect some routes to prefix + route (only if features.isWkorgInstance, route "/" only if logged out) From a509ceba4f160452bb55a00bb5117b1a8c36eb8d Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 25 Sep 2025 14:53:15 +0200 Subject: [PATCH 27/62] re-add s3Upload block in application.conf --- conf/application.conf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf/application.conf b/conf/application.conf index 0135bce1b90..31b561ad56b 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -246,6 +246,12 @@ datastore { # The credentials are selected by uri prefix, so different s3 uri styles may need duplicated credential entries. credentials = [] } + s3Upload { + enabled = false + # Use the name of a credential in the dataVaults section here to use it for uploads. + credentialName = "s3://example/uri/prefix" + objectKeyPrefix = "webknossos-uploads" + } } # Redirect some routes to prefix + route (only if features.isWkorgInstance, route "/" only if logged out) From 32ead3309b20f3120b024a146d4ecd8b130b285d Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 29 Sep 2025 11:23:58 +0200 Subject: [PATCH 28/62] improve compatibility with worker convert jobs --- app/controllers/DatasetController.scala | 10 +++-- .../WKRemoteDataStoreController.scala | 14 ++++--- app/models/dataset/ComposeService.scala | 11 +++--- app/models/dataset/Dataset.scala | 6 +++ app/models/dataset/DatasetService.scala | 37 ++++++++++--------- .../dataset/DatasetUploadToPathsService.scala | 1 + .../admin/dataset/dataset_upload_view.tsx | 1 + .../controllers/DSLegacyApiController.scala | 6 ++- .../services/uploading/UploadService.scala | 3 +- 9 files changed, 54 insertions(+), 35 deletions(-) diff --git a/app/controllers/DatasetController.scala b/app/controllers/DatasetController.scala index 75e56cf222b..253bade45d6 100755 --- a/app/controllers/DatasetController.scala +++ b/app/controllers/DatasetController.scala @@ -205,12 +205,13 @@ class DatasetController @Inject()(userService: UserService, folderService.getOrCreateFromPathLiteral(folderPath, request.identity._organization)) ?~> "dataset.explore.autoAdd.getFolder.failed" _ <- datasetService.assertValidDatasetName(request.body.datasetName) _ <- Fox.serialCombined(dataSource.dataLayers)(layer => datasetService.assertValidLayerNameLax(layer.name)) - newDataset <- datasetService.createVirtualDataset( + newDataset <- datasetService.createAndSetUpDataset( request.body.datasetName, dataStore, dataSource, folderIdOpt, - request.identity + request.identity, + isVirtual = true ) ?~> "dataset.explore.autoAdd.failed" } yield Ok(Json.toJson(newDataset._id)) } @@ -226,12 +227,13 @@ class DatasetController @Inject()(userService: UserService, _ <- Fox.fromBool(isTeamManagerOrAdmin || user.isDatasetManager) ~> FORBIDDEN _ <- Fox.fromBool(request.body.dataSource.dataLayers.nonEmpty) ?~> "dataset.explore.zeroLayers" _ <- datasetService.validatePaths(request.body.dataSource.allExplicitPaths, dataStore) ?~> "dataSource.add.pathsNotAllowed" - dataset <- datasetService.createVirtualDataset( + dataset <- datasetService.createAndSetUpDataset( name, dataStore, request.body.dataSource, request.body.folderId, - user + user, + isVirtual = true ) _ <- datasetService.trackNewDataset(dataset, user, diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index de203bcf02a..2f9369714d0 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -78,11 +78,15 @@ class WKRemoteDataStoreController @Inject()( _ <- Fox.runIf(request.body.requireUniqueName.getOrElse(false))( datasetService.assertNewDatasetNameUnique(request.body.name, organization._id)) preliminaryDataSource = UnusableDataSource(DataSourceId("", ""), None, DataSourceStatus.notYetUploaded) - dataset <- datasetService.createVirtualDataset(uploadInfo.name, - dataStore, - preliminaryDataSource, - uploadInfo.folderId, - user) ?~> "dataset.upload.creation.failed" + dataset <- datasetService.createAndSetUpDataset( + uploadInfo.name, + dataStore, + preliminaryDataSource, + uploadInfo.folderId, + user, + // For the moment, the convert_to_wkw job can only fill the dataset if it is not virtual. + isVirtual = !uploadInfo.needsConversion.getOrElse(false) + ) ?~> "dataset.upload.creation.failed" _ <- datasetService.addInitialTeams(dataset, uploadInfo.initialTeams, user)(AuthorizedAccessContext(user)) additionalInfo = ReserveAdditionalInformation(dataset._id, dataset.directoryName) } yield Ok(Json.toJson(additionalInfo)) diff --git a/app/models/dataset/ComposeService.scala b/app/models/dataset/ComposeService.scala index 3b7ff349a5e..c92a69a448a 100644 --- a/app/models/dataset/ComposeService.scala +++ b/app/models/dataset/ComposeService.scala @@ -45,11 +45,12 @@ class ComposeService @Inject()(datasetDAO: DatasetDAO, dataStoreDAO: DataStoreDA _ <- Fox.assertTrue(isComposable(composeRequest)) ?~> "Datasets are not composable, they are not on the same data store" dataSource <- createDatasource(composeRequest, composeRequest.newDatasetName, composeRequest.organizationId) dataStore <- dataStoreDAO.findOneWithUploadsAllowed - dataset <- datasetService.createVirtualDataset(composeRequest.newDatasetName, - dataStore, - dataSource, - Some(composeRequest.targetFolderId), - user) + dataset <- datasetService.createAndSetUpDataset(composeRequest.newDatasetName, + dataStore, + dataSource, + Some(composeRequest.targetFolderId), + user, + isVirtual = true) } yield (dataSource, dataset._id) diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index 15f13c9d4ca..8c7b8842a10 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -663,6 +663,12 @@ class DatasetDAO @Inject()(sqlClient: SqlClient, datasetLayerDAO: DatasetLayerDA WHERE _id = $id""".asUpdate) } yield () + def makeVirtual(datasetId: ObjectId)(implicit ctx: DBAccessContext): Fox[Unit] = + for { + _ <- assertUpdateAccess(datasetId) + _ <- run(q"UPDATE webknossos.datasets SET isVirtual = ${true} WHERE _id = $datasetId".asUpdate) + } yield () + def deactivateUnreported(existingDatasetIds: List[ObjectId], dataStoreName: String, organizationId: Option[String], diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index fd8f3bb3e35..70d091798d1 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -85,11 +85,24 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, _ <- Fox.fromBool(!isDatasetNameAlreadyTaken) ?~> "dataset.name.alreadyTaken" } yield () - def createVirtualDataset(datasetName: String, - dataStore: DataStore, - dataSource: DataSource, - folderId: Option[ObjectId], - user: User): Fox[Dataset] = + def getAllUnfinishedDatasetUploadsOfUser(userId: ObjectId, organizationId: String)( + implicit ctx: DBAccessContext): Fox[List[DatasetCompactInfo]] = + datasetDAO.findAllCompactWithSearch( + uploaderIdOpt = Some(userId), + organizationIdOpt = Some(organizationId), + isActiveOpt = Some(false), + includeSubfolders = true, + statusOpt = Some(DataSourceStatus.notYetUploaded), + // Only list pending uploads since the two last weeks. + createdSinceOpt = Some(Instant.now - (14 days)) + ) ?~> "dataset.list.fetchFailed" + + def createAndSetUpDataset(datasetName: String, + dataStore: DataStore, + dataSource: DataSource, + folderId: Option[ObjectId], + user: User, + isVirtual: Boolean): Fox[Dataset] = for { _ <- assertValidDatasetName(datasetName) organization <- organizationDAO.findOne(user._organization)(GlobalAccessContext) ?~> "organization.notFound" @@ -101,24 +114,12 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, newDatasetId, datasetName, dataSource.withUpdatedId(DataSourceId(directoryName, organization._id)), - isVirtual = true) + isVirtual = isVirtual) datasetId = dataset._id _ <- datasetDAO.updateFolder(datasetId, folderIdWithFallback)(GlobalAccessContext) _ <- addUploader(dataset, user._id)(GlobalAccessContext) } yield dataset - def getAllUnfinishedDatasetUploadsOfUser(userId: ObjectId, organizationId: String)( - implicit ctx: DBAccessContext): Fox[List[DatasetCompactInfo]] = - datasetDAO.findAllCompactWithSearch( - uploaderIdOpt = Some(userId), - organizationIdOpt = Some(organizationId), - isActiveOpt = Some(false), - includeSubfolders = true, - statusOpt = Some(DataSourceStatus.notYetUploaded), - // Only list pending uploads since the two last weeks. - createdSinceOpt = Some(Instant.now - (14 days)) - ) ?~> "dataset.list.fetchFailed" - def createDataset( dataStore: DataStore, datasetId: ObjectId, diff --git a/app/models/dataset/DatasetUploadToPathsService.scala b/app/models/dataset/DatasetUploadToPathsService.scala index 8a0ea2bc160..a9cc26fa77f 100644 --- a/app/models/dataset/DatasetUploadToPathsService.scala +++ b/app/models/dataset/DatasetUploadToPathsService.scala @@ -97,6 +97,7 @@ class DatasetUploadToPathsService @Inject()(datasetService: DatasetService, requestingUser._organization, parameters.pathPrefix) _ <- assertValidDataSource(dataSourceWithPaths).toFox + _ <- datasetDAO.makeVirtual(dataset._id) _ <- datasetDAO.updateDataSource(dataset._id, dataset._dataStore, dataSourceWithPaths.hashCode(), diff --git a/frontend/javascripts/admin/dataset/dataset_upload_view.tsx b/frontend/javascripts/admin/dataset/dataset_upload_view.tsx index 0bc30df7729..fef2ade942d 100644 --- a/frontend/javascripts/admin/dataset/dataset_upload_view.tsx +++ b/frontend/javascripts/admin/dataset/dataset_upload_view.tsx @@ -343,6 +343,7 @@ class DatasetUploadView extends React.Component { layersToLink: [], initialTeams: formValues.initialTeams.map((team: APITeam) => team.id), folderId: formValues.targetFolderId, + needsConversion: this.state.needsConversion, }; const datastoreUrl = formValues.datastoreUrl; await reserveDatasetUpload(datastoreUrl, reserveUploadInformation); diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala index dd7f5a2efef..694d73e98a2 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala @@ -108,7 +108,8 @@ class DSLegacyApiController @Inject()( initialTeams = request.body.initialTeams, folderId = request.body.folderId, requireUniqueName = request.body.requireUniqueName, - isVirtual = None + isVirtual = None, + needsConversion = None ) result <- Fox.fromFuture(dataSourceController.reserveUpload()(request.withBody(adaptedRequestBody))) } yield result @@ -140,7 +141,8 @@ class DSLegacyApiController @Inject()( request.body.initialTeamIds, request.body.folderId, Some(request.body.requireUniqueName), - None + None, + needsConversion = None ) ) ?~> "dataset.upload.validation.failed" } yield diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 8b666a17340..48ddadbe59d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -54,7 +54,8 @@ case class ReserveUploadInformation( initialTeams: List[ObjectId], // team ids folderId: Option[ObjectId], requireUniqueName: Option[Boolean], - isVirtual: Option[Boolean] // Only set (to false) for legacy manual uploads + isVirtual: Option[Boolean], // Only set (to false) for legacy manual uploads + needsConversion: Option[Boolean] // None means false ) object ReserveUploadInformation { implicit val jsonFormat: OFormat[ReserveUploadInformation] = Json.format[ReserveUploadInformation] From b48966d00fcc7aaf6a5cad33dfa864338d5cadbf Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 29 Sep 2025 12:02:03 +0200 Subject: [PATCH 29/62] unify logging --- app/controllers/DatasetController.scala | 20 ++++++++-------- .../WKRemoteDataStoreController.scala | 10 ++++---- .../services/uploading/UploadService.scala | 24 +++++++++++++------ 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/app/controllers/DatasetController.scala b/app/controllers/DatasetController.scala index 253bade45d6..eff38f5a492 100755 --- a/app/controllers/DatasetController.scala +++ b/app/controllers/DatasetController.scala @@ -235,11 +235,11 @@ class DatasetController @Inject()(userService: UserService, user, isVirtual = true ) - _ <- datasetService.trackNewDataset(dataset, - user, - needsConversion = false, - datasetSizeBytes = 0, - viaAddRoute = false) + _ = datasetService.trackNewDataset(dataset, + user, + needsConversion = false, + datasetSizeBytes = 0, + viaAddRoute = false) } yield Ok(Json.obj("newDatasetId" -> dataset._id)) } @@ -659,11 +659,11 @@ class DatasetController @Inject()(userService: UserService, _ <- Fox.fromBool(!dataset.isUsable) ?~> s"Dataset is already marked as usable." _ <- datasetDAO.updateDatasetStatusByDatasetId(datasetId, newStatus = "", isUsable = true) _ <- usedStorageService.refreshStorageReportForDataset(dataset) - _ <- datasetService.trackNewDataset(dataset, - request.identity, - needsConversion = false, - datasetSizeBytes = 0, - viaAddRoute = false) + _ = datasetService.trackNewDataset(dataset, + request.identity, + needsConversion = false, + datasetSizeBytes = 0, + viaAddRoute = false) } yield Ok } diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index 2f9369714d0..bb433c5ed9c 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -132,11 +132,11 @@ class WKRemoteDataStoreController @Inject()( user <- bearerTokenService.userForToken(token) dataset <- datasetDAO.findOne(datasetId)(GlobalAccessContext) ?~> Messages("dataset.notFound", datasetId) ~> NOT_FOUND _ <- Fox.runIf(!request.body.needsConversion)(usedStorageService.refreshStorageReportForDataset(dataset)) - _ <- datasetService.trackNewDataset(dataset, - user, - request.body.needsConversion, - request.body.datasetSizeBytes, - viaAddRoute = false) + _ = datasetService.trackNewDataset(dataset, + user, + request.body.needsConversion, + request.body.datasetSizeBytes, + viaAddRoute = false) dataSourceWithLinkedLayersOpt <- Fox.runOptional(request.body.dataSourceOpt) { implicit val ctx: DBAccessContext = AuthorizedAccessContext(user) layerToLinkService.addLayersToLinkToDataSource(_, request.body.layersToLink) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 48ddadbe59d..ea31a2d09df 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -159,6 +159,9 @@ class UploadService @Inject()(dataSourceService: DataSourceService, private def uploadDirectoryFor(organizationId: String, uploadId: String): Path = dataBaseDir.resolve(organizationId).resolve(uploadingDir).resolve(uploadId) + private def uploadBackupDirectoryFor(organizationId: String, uploadId: String): Path = + dataBaseDir.resolve(organizationId).resolve(trashDir).resolve(s"uploadBackup__$uploadId") + private def getDataSourceIdByUploadId(uploadId: String): Fox[DataSourceId] = getObjectFromRedis[DataSourceId](redisKeyForDataSourceId(uploadId)) @@ -200,7 +203,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, Json.stringify(Json.toJson(LinkedLayerIdentifiers(reserveUploadInfo.layersToLink))) ) _ = logger.info( - f"Reserving dataset upload ${reserveUploadInfo.uploadId} for dataset ${reserveUploadAdditionalInfo.newDatasetId} ($newDataSourceId)...") + f"Reserving ${uploadFullName(reserveUploadInfo.uploadId, reserveUploadAdditionalInfo.newDatasetId, newDataSourceId)}...") } yield () def addUploadIdsToUnfinishedUploads( @@ -263,6 +266,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, val uploadId = extractDatasetUploadId(uploadFileId) for { datasetId <- getDatasetIdByUploadId(uploadId) + dataSourceId <- getDataSourceIdByUploadId(uploadId) (filePath, uploadDir) <- getFilePathAndDirOfUploadId(uploadFileId) isFileKnown <- runningUploadMetadataStore.contains(redisKeyForFileChunkCount(uploadId, filePath)) totalFileSizeInBytesOpt <- runningUploadMetadataStore.findLong(redisKeyForTotalFileSizeInBytes(uploadId)) @@ -304,7 +308,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, runningUploadMetadataStore.removeFromSet(redisKeyForFileChunkSet(uploadId, filePath), String.valueOf(currentChunkNumber)) val errorMsg = - s"Error receiving chunk $currentChunkNumber for uploadId $uploadId (datasetId $datasetId): ${e.getMessage}" + s"Error receiving chunk $currentChunkNumber for ${uploadFullName(uploadId, datasetId, dataSourceId)}: ${e.getMessage}" logger.warn(errorMsg) Fox.failure(errorMsg) } @@ -319,7 +323,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, knownUpload <- isKnownUpload(uploadId) } yield if (knownUpload) { - logger.info(f"Cancelling dataset upload of uploadId $uploadId (datasetId $datasetId)...") + logger.info(f"Cancelling ${uploadFullName(uploadId, datasetId, dataSourceId)}...") for { _ <- removeFromRedis(uploadId) _ <- PathUtils.deleteDirectoryRecursively(uploadDirectoryFor(dataSourceId.organizationId, uploadId)).toFox @@ -327,6 +331,9 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } else Fox.failure(s"Unknown upload") } + private def uploadFullName(uploadId: String, datasetId: ObjectId, dataSourceId: DataSourceId) = + s"upload $uploadId of dataset $datasetId ($dataSourceId)" + private def assertWithinRequestedFileSizeAndCleanUpOtherwise(uploadDir: Path, uploadId: String): Fox[Unit] = for { totalFileSizeInBytesOpt <- runningUploadMetadataStore.find(redisKeyForTotalFileSizeInBytes(uploadId)) @@ -346,15 +353,15 @@ class UploadService @Inject()(dataSourceService: DataSourceService, dataSourceId <- getDataSourceIdByUploadId(uploadId) datasetId <- getDatasetIdByUploadId(uploadId) linkedLayerIdentifiers <- getObjectFromRedis[LinkedLayerIdentifiers](redisKeyForLinkedLayerIdentifier(uploadId)) - _ = logger.info(s"Finishing dataset upload $uploadId of datasetId $datasetId ($dataSourceId)...") + _ = logger.info(s"Finishing ${uploadFullName(uploadId, datasetId, dataSourceId)}...") _ <- Fox.fromBool( !uploadInformation.needsConversion.getOrElse(false) || !linkedLayerIdentifiers.layersToLink .exists(_.nonEmpty)) ?~> "Cannot use linked layers if the dataset needs conversion" needsConversion = uploadInformation.needsConversion.getOrElse(false) uploadDir = uploadDirectoryFor(dataSourceId.organizationId, uploadId) + _ <- backupRawUploadedData(uploadDir, uploadBackupDirectoryFor(dataSourceId.organizationId, uploadId)).toFox _ <- assertWithinRequestedFileSizeAndCleanUpOtherwise(uploadDir, uploadId) _ <- checkAllChunksUploaded(uploadId) - unpackToDir = unpackToDirFor(dataSourceId) _ <- ensureDirectoryBox(unpackToDir.getParent).toFox ?~> "dataset.import.fileAccessDenied" unpackResult <- unpackDataset(uploadDir, unpackToDir).shiftBox @@ -392,7 +399,6 @@ class UploadService @Inject()(dataSourceService: DataSourceService, logger.info(s"Uploaded dataset contains files not referenced in the datasource. Deleting $filesToDelete...") _ = filesToDelete.foreach(file => { try { - // TODO move to trash instead? Files.deleteIfExists(file) } catch { case e: Exception => @@ -425,7 +431,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/${dataSourceId.organizationId}/${dataSourceId.directoryName}/" _ <- uploadDirectoryToS3(unpackedDir, s3UploadBucket, s3ObjectKey) _ = Instant.logSince(beforeS3Upload, - s"Forwarding of uploaded of dataset $datasetId ($dataSourceId) to S3", + s"Forwarding of uploaded dataset $datasetId ($dataSourceId) to S3", logger) endPointHost = new URI(dataStoreConfig.Datastore.S3Upload.credentialName).getHost newBasePath <- UPath.fromString(s"s3://$endPointHost/$s3UploadBucket/$s3ObjectKey").toFox @@ -793,6 +799,10 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } } yield () + private def backupRawUploadedData(uploadDir: Path, backupDir: Path): Box[Unit] = + // Backed up within .trash (old files regularly deleted by cronjob) + tryo(FileUtils.copyDirectory(uploadDir.toFile, backupDir.toFile)) + private def cleanUpUploadedDataset(uploadDir: Path, uploadId: String): Fox[Unit] = { this.synchronized { PathUtils.deleteDirectoryRecursively(uploadDir) From 4bfb9a225cf601fccc3c5f89bc57de92217dcb12 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 29 Sep 2025 13:50:58 +0200 Subject: [PATCH 30/62] handle legacy layersToLink with dataset id --- app/controllers/WKRemoteDataStoreController.scala | 2 +- app/models/dataset/Dataset.scala | 7 ++++--- .../controllers/DSLegacyApiController.scala | 14 +++++++++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index bb433c5ed9c..5737958beb8 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -237,7 +237,7 @@ class WKRemoteDataStoreController @Inject()( "organization.notFound", organizationId) ~> NOT_FOUND dataset <- datasetDAO.findOneByNameAndOrganization(datasetDirectoryName, organization._id)( - GlobalAccessContext) + GlobalAccessContext) ?~> Messages("dataset.notFound", datasetDirectoryName) } yield Ok(Json.toJson(dataset._id)) } } diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index 8c7b8842a10..6780e31dbd1 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -443,17 +443,18 @@ class DatasetDAO @Inject()(sqlClient: SqlClient, datasetLayerDAO: DatasetLayerDA // Legacy links to Datasets used their name and organizationId as identifier. In #8075 name was changed to directoryName. // Thus, interpreting the name as the directory name should work, as changing the directory name is not possible. // This way of looking up datasets should only be used for backwards compatibility. - def findOneByNameAndOrganization(name: String, organizationId: String)(implicit ctx: DBAccessContext): Fox[Dataset] = + def findOneByNameAndOrganization(directoryName: String, organizationId: String)( + implicit ctx: DBAccessContext): Fox[Dataset] = for { accessQuery <- readAccessQuery r <- run(q"""SELECT $columns FROM $existingCollectionName - WHERE (directoryName = $name) + WHERE (directoryName = $directoryName) AND _organization = $organizationId AND $accessQuery ORDER BY created ASC LIMIT 1""".as[DatasetsRow]) - parsed <- parseFirst(r, s"$organizationId/$name") + parsed <- parseFirst(r, s"$organizationId/$directoryName") } yield parsed def findOneByIdOrNameAndOrganization(datasetIdOpt: Option[ObjectId], datasetName: String, organizationId: String)( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala index 694d73e98a2..7415a8c66d3 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DSLegacyApiController.scala @@ -116,11 +116,19 @@ class DSLegacyApiController @Inject()( } } - private def adaptLayerToLink(legacyLayerToLink: LegacyLinkedLayerIdentifier): Fox[LinkedLayerIdentifier] = + private def adaptLayerToLink(legacyLayerToLink: LegacyLinkedLayerIdentifier): Fox[LinkedLayerIdentifier] = { + val asObjectIdOpt = ObjectId.fromStringSync(legacyLayerToLink.dataSetName) for { - datasetId <- remoteWebknossosClient.getDatasetId(legacyLayerToLink.getOrganizationId, - legacyLayerToLink.dataSetName) + datasetId <- asObjectIdOpt match { + case Some(asObjectId) => + // Client already used datasetId in the dataSetName field. The libs did this for a while. + Fox.successful(asObjectId) + case None => + // dataSetName is not an objectId. Assume directoryName. Resolve with remoteWebknossosClient. + remoteWebknossosClient.getDatasetId(legacyLayerToLink.getOrganizationId, legacyLayerToLink.dataSetName) + } } yield LinkedLayerIdentifier(datasetId, legacyLayerToLink.layerName, legacyLayerToLink.newLayerName) + } // To be called by people with disk access but not DatasetManager role. This way, they can upload a dataset manually on disk, // and it can be put in a webknossos folder where they have access From 2aebc3497affc6de63adcb7921f52bb5f7970706 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 2 Oct 2025 14:41:26 +0200 Subject: [PATCH 31/62] implement pr feedback --- unreleased_changes/8912.md | 2 +- .../datastore/services/uploading/UploadService.scala | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/unreleased_changes/8912.md b/unreleased_changes/8912.md index f770b2e6f34..6d7451f99fe 100644 --- a/unreleased_changes/8912.md +++ b/unreleased_changes/8912.md @@ -2,7 +2,7 @@ - Datasets can be uploaded to S3-compatible object storage services. This is disabled by default. ### Migration -- New keys have been added to the application.conf of the data store. +- New keys have been added to the application.conf of the datastore. Add these lines in the "datastore" section to enable S3 upload: ``` diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index ea31a2d09df..28f3fca1dc9 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -172,6 +172,9 @@ class UploadService @Inject()(dataSourceService: DataSourceService, reserveUploadAdditionalInfo: ReserveAdditionalInformation): Fox[Unit] = for { _ <- dataSourceService.assertDataDirWritable(reserveUploadInfo.organization) + _ <- Fox.fromBool( + !reserveUploadInfo.needsConversion.getOrElse(false) || !reserveUploadInfo.layersToLink + .exists(_.nonEmpty)) ?~> "Cannot use linked layers if the dataset needs conversion" _ <- runningUploadMetadataStore.insert(redisKeyForFileCount(reserveUploadInfo.uploadId), String.valueOf(reserveUploadInfo.totalFileCount)) _ <- Fox.runOptional(reserveUploadInfo.totalFileSizeInBytes) { fileSize => @@ -354,9 +357,6 @@ class UploadService @Inject()(dataSourceService: DataSourceService, datasetId <- getDatasetIdByUploadId(uploadId) linkedLayerIdentifiers <- getObjectFromRedis[LinkedLayerIdentifiers](redisKeyForLinkedLayerIdentifier(uploadId)) _ = logger.info(s"Finishing ${uploadFullName(uploadId, datasetId, dataSourceId)}...") - _ <- Fox.fromBool( - !uploadInformation.needsConversion.getOrElse(false) || !linkedLayerIdentifiers.layersToLink - .exists(_.nonEmpty)) ?~> "Cannot use linked layers if the dataset needs conversion" needsConversion = uploadInformation.needsConversion.getOrElse(false) uploadDir = uploadDirectoryFor(dataSourceId.organizationId, uploadId) _ <- backupRawUploadedData(uploadDir, uploadBackupDirectoryFor(dataSourceId.organizationId, uploadId)).toFox From 8dab1241033adc099ec5faacc8065f33e7274191 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 6 Oct 2025 09:51:19 +0200 Subject: [PATCH 32/62] remove this.synchronized around UploadService file operations --- .../services/uploading/UploadService.scala | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 28f3fca1dc9..0e3e2e6f75a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -298,13 +298,11 @@ class UploadService @Inject()(dataSourceService: DataSourceService, if (isNewChunk) { try { val bytes = Files.readAllBytes(chunkFile.toPath) - this.synchronized { - PathUtils.ensureDirectory(uploadDir.resolve(filePath).getParent) - val tempFile = new RandomAccessFile(uploadDir.resolve(filePath).toFile, "rw") - tempFile.seek((currentChunkNumber - 1) * chunkSize) - tempFile.write(bytes) - tempFile.close() - } + PathUtils.ensureDirectory(uploadDir.resolve(filePath).getParent) + val tempFile = new RandomAccessFile(uploadDir.resolve(filePath).toFile, "rw") + tempFile.seek((currentChunkNumber - 1) * chunkSize) + tempFile.write(bytes) + tempFile.close() Fox.successful(()) } catch { case e: Exception => @@ -444,9 +442,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } yield UPath.fromLocalPath(finalUploadedLocalPath) } dataSourceWithAdaptedPaths = dataSourceService.resolvePathsInNewBasePath(usableDataSourceFromDir, newBasePath) - _ = this.synchronized { - PathUtils.deleteDirectoryRecursively(unpackedDir) - } + _ = PathUtils.deleteDirectoryRecursively(unpackedDir) } yield Some(dataSourceWithAdaptedPaths) } @@ -804,9 +800,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, tryo(FileUtils.copyDirectory(uploadDir.toFile, backupDir.toFile)) private def cleanUpUploadedDataset(uploadDir: Path, uploadId: String): Fox[Unit] = { - this.synchronized { - PathUtils.deleteDirectoryRecursively(uploadDir) - } + PathUtils.deleteDirectoryRecursively(uploadDir) removeFromRedis(uploadId) } From fe9c527adc4b3895429c33d5f0850ea792f39ebb Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 6 Oct 2025 09:56:19 +0200 Subject: [PATCH 33/62] Revert "remove this.synchronized around UploadService file operations" This reverts commit 8dab1241033adc099ec5faacc8065f33e7274191. --- .../services/uploading/UploadService.scala | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 0e3e2e6f75a..28f3fca1dc9 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -298,11 +298,13 @@ class UploadService @Inject()(dataSourceService: DataSourceService, if (isNewChunk) { try { val bytes = Files.readAllBytes(chunkFile.toPath) - PathUtils.ensureDirectory(uploadDir.resolve(filePath).getParent) - val tempFile = new RandomAccessFile(uploadDir.resolve(filePath).toFile, "rw") - tempFile.seek((currentChunkNumber - 1) * chunkSize) - tempFile.write(bytes) - tempFile.close() + this.synchronized { + PathUtils.ensureDirectory(uploadDir.resolve(filePath).getParent) + val tempFile = new RandomAccessFile(uploadDir.resolve(filePath).toFile, "rw") + tempFile.seek((currentChunkNumber - 1) * chunkSize) + tempFile.write(bytes) + tempFile.close() + } Fox.successful(()) } catch { case e: Exception => @@ -442,7 +444,9 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } yield UPath.fromLocalPath(finalUploadedLocalPath) } dataSourceWithAdaptedPaths = dataSourceService.resolvePathsInNewBasePath(usableDataSourceFromDir, newBasePath) - _ = PathUtils.deleteDirectoryRecursively(unpackedDir) + _ = this.synchronized { + PathUtils.deleteDirectoryRecursively(unpackedDir) + } } yield Some(dataSourceWithAdaptedPaths) } @@ -800,7 +804,9 @@ class UploadService @Inject()(dataSourceService: DataSourceService, tryo(FileUtils.copyDirectory(uploadDir.toFile, backupDir.toFile)) private def cleanUpUploadedDataset(uploadDir: Path, uploadId: String): Fox[Unit] = { - PathUtils.deleteDirectoryRecursively(uploadDir) + this.synchronized { + PathUtils.deleteDirectoryRecursively(uploadDir) + } removeFromRedis(uploadId) } From 9efac9a6d3006f05dd2683aa5bff657f8b7b2108 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 7 Oct 2025 10:54:25 +0200 Subject: [PATCH 34/62] some more logging for finishUpload request --- .../services/uploading/UploadService.scala | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 28f3fca1dc9..0ced8e8cf74 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -355,16 +355,16 @@ class UploadService @Inject()(dataSourceService: DataSourceService, for { dataSourceId <- getDataSourceIdByUploadId(uploadId) datasetId <- getDatasetIdByUploadId(uploadId) - linkedLayerIdentifiers <- getObjectFromRedis[LinkedLayerIdentifiers](redisKeyForLinkedLayerIdentifier(uploadId)) _ = logger.info(s"Finishing ${uploadFullName(uploadId, datasetId, dataSourceId)}...") + linkedLayerIdentifiers <- getObjectFromRedis[LinkedLayerIdentifiers](redisKeyForLinkedLayerIdentifier(uploadId)) needsConversion = uploadInformation.needsConversion.getOrElse(false) uploadDir = uploadDirectoryFor(dataSourceId.organizationId, uploadId) - _ <- backupRawUploadedData(uploadDir, uploadBackupDirectoryFor(dataSourceId.organizationId, uploadId)).toFox + _ <- backupRawUploadedData(uploadDir, uploadBackupDirectoryFor(dataSourceId.organizationId, uploadId), datasetId).toFox _ <- assertWithinRequestedFileSizeAndCleanUpOtherwise(uploadDir, uploadId) _ <- checkAllChunksUploaded(uploadId) unpackToDir = unpackToDirFor(dataSourceId) _ <- ensureDirectoryBox(unpackToDir.getParent).toFox ?~> "dataset.import.fileAccessDenied" - unpackResult <- unpackDataset(uploadDir, unpackToDir).shiftBox + unpackResult <- unpackDataset(uploadDir, unpackToDir, datasetId).shiftBox _ <- cleanUpUploadedDataset(uploadDir, uploadId) _ <- cleanUpOnFailure(unpackResult, datasetId, @@ -412,6 +412,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, datasetId: ObjectId, dataSourceId: DataSourceId): Fox[Option[UsableDataSource]] = if (needsConversion) { + logger.info(s"finishUpload for $datasetId: Moving data to input dir for worker conversion...") val forConversionPath = dataBaseDir.resolve(dataSourceId.organizationId).resolve(forConversionDir).resolve(dataSourceId.directoryName) for { @@ -427,6 +428,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, newBasePath <- if (dataStoreConfig.Datastore.S3Upload.enabled) { for { s3UploadBucket <- s3UploadBucketOpt.toFox + _ = logger.info(s"finishUpload for $datasetId: Copying data to s3 bucket $s3UploadBucket...") beforeS3Upload = Instant.now s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/${dataSourceId.organizationId}/${dataSourceId.directoryName}/" _ <- uploadDirectoryToS3(unpackedDir, s3UploadBucket, s3ObjectKey) @@ -439,6 +441,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } else { val finalUploadedLocalPath = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) + logger.info(s"finishUpload for $datasetId: Moving data to final local path $finalUploadedLocalPath...") for { _ <- tryo(FileUtils.moveDirectory(unpackedDir.toFile, finalUploadedLocalPath.toFile)).toFox } yield UPath.fromLocalPath(finalUploadedLocalPath) @@ -768,7 +771,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, pathDepth(oneHeaderWkwPath) == 1 } - private def unpackDataset(uploadDir: Path, unpackToDir: Path): Fox[Unit] = + private def unpackDataset(uploadDir: Path, unpackToDir: Path, datasetId: ObjectId): Fox[Unit] = for { shallowFileList <- PathUtils.listFiles(uploadDir, silent = false).toFox excludeFromPrefix = LayerCategory.values.map(_.toString).toList @@ -776,6 +779,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ <- if (shallowFileList.length == 1 && shallowFileList.headOption.exists( _.toString.toLowerCase.endsWith(".zip"))) { firstFile.toFox.flatMap { file => + logger.info(s"finishUpload for $datasetId: Unzipping dataset...") ZipIO .unzipToDirectory( new File(file.toString), @@ -799,9 +803,11 @@ class UploadService @Inject()(dataSourceService: DataSourceService, } } yield () - private def backupRawUploadedData(uploadDir: Path, backupDir: Path): Box[Unit] = + private def backupRawUploadedData(uploadDir: Path, backupDir: Path, datasetId: ObjectId): Box[Unit] = { + logger.info(s"finishUpload for $datasetId: Backing up raw uploaded data...") // Backed up within .trash (old files regularly deleted by cronjob) tryo(FileUtils.copyDirectory(uploadDir.toFile, backupDir.toFile)) + } private def cleanUpUploadedDataset(uploadDir: Path, uploadId: String): Fox[Unit] = { this.synchronized { From 8d51044649320f156f6eaf085ac09fad60b4b8a3 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 8 Oct 2025 09:40:17 +0200 Subject: [PATCH 35/62] add slow request slack notification for finishUpload --- .../controllers/DataSourceController.scala | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index c7f6db77a47..feb02b967fd 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -186,14 +186,17 @@ class DataSourceController @Inject()( def finishUpload(): Action[UploadInformation] = Action.async(validateJson[UploadInformation]) { implicit request => log(Some(slackNotificationService.noticeFailedFinishUpload)) { - for { - datasetId <- uploadService.getDatasetIdByUploadId(request.body.uploadId) ?~> "dataset.upload.validation.failed" - response <- accessTokenService.validateAccessFromTokenContext(UserAccessRequest.writeDataset(datasetId)) { - for { - datasetId <- uploadService.finishUpload(request.body) ?~> "dataset.upload.finishFailed" - } yield Ok(Json.obj("newDatasetId" -> datasetId)) - } - } yield response + logTime(slackNotificationService.noticeSlowRequest) { + for { + datasetId <- uploadService + .getDatasetIdByUploadId(request.body.uploadId) ?~> "dataset.upload.validation.failed" + response <- accessTokenService.validateAccessFromTokenContext(UserAccessRequest.writeDataset(datasetId)) { + for { + datasetId <- uploadService.finishUpload(request.body) ?~> "dataset.upload.finishFailed" + } yield Ok(Json.obj("newDatasetId" -> datasetId)) + } + } yield response + } } } From 8d480b2249fab24310bc32bc17831ec54dffda99 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 9 Oct 2025 14:15:05 +0200 Subject: [PATCH 36/62] cleanup --- app/models/dataset/DatasetService.scala | 9 +-------- .../datastore/controllers/DataSourceController.scala | 4 +--- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index 2d34d8a1584..c21e1d0839d 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -527,16 +527,9 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, def deleteVirtualOrDiskDataset(dataset: Dataset)(implicit ctx: DBAccessContext): Fox[Unit] = for { - //_ <- if (dataset.isVirtual) { - // At this point, we should also free space in S3 once implemented. - // Right now, we can just mark the dataset as deleted in the database. - // datasetDAO.deleteDataset(dataset._id, onlyMarkAsDeleted = true) - //} else { - //for { + dataSource <- dataSourceFor(dataset) datastoreClient <- clientFor(dataset) _ <- datastoreClient.deleteOnDisk(dataset._id) ?~> "dataset.delete.failed" - // } yield () - //} ?~> "dataset.delete.failed" } yield () def generateDirectoryName(datasetName: String, datasetId: ObjectId): String = diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index f5ab2f73d74..d88fa7b984a 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -394,10 +394,9 @@ class DataSourceController @Inject()( } } - // TODO should not be a datastore route, have wk call this here def deleteOnDisk(datasetId: ObjectId): Action[AnyContent] = Action.async { implicit request => - accessTokenService.validateAccessFromTokenContext(UserAccessRequest.deleteDataset(datasetId)) { + accessTokenService.validateAccessFromTokenContext(UserAccessRequest.webknossos) { for { dataSource <- datasetCache.getById(datasetId) ~> NOT_FOUND dataSourceId = dataSource.id @@ -413,7 +412,6 @@ class DataSourceController @Inject()( for { _ <- Fox.runIf(dataSourceService.datasetInControlledS3(dataSource))( dataSourceService.deleteFromControlledS3(dataSource, datasetId)) - _ = logger.warn(s"Tried to delete dataset ${dataSource.id} ($datasetId), but is not present on disk.") } yield () _ <- dsRemoteWebknossosClient.deleteDataset(datasetId) } yield Ok From 3860da323eab04fe91bc992ea7dd5582e8913c86 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 14 Oct 2025 11:23:29 +0200 Subject: [PATCH 37/62] draft --- app/controllers/DatasetController.scala | 6 +++--- app/models/dataset/DatasetService.scala | 9 ++++++++- conf/webknossos.latest.routes | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/app/controllers/DatasetController.scala b/app/controllers/DatasetController.scala index eff38f5a492..52889673e39 100755 --- a/app/controllers/DatasetController.scala +++ b/app/controllers/DatasetController.scala @@ -581,14 +581,14 @@ class DatasetController @Inject()(userService: UserService, } } - def deleteOnDisk(datasetId: ObjectId): Action[AnyContent] = + def delete(datasetId: ObjectId): Action[AnyContent] = sil.SecuredAction.async { implicit request => for { dataset <- datasetDAO.findOne(datasetId) ?~> notFoundMessage(datasetId.toString) ~> NOT_FOUND _ <- Fox.fromBool(conf.Features.allowDeleteDatasets) ?~> "dataset.delete.disabled" _ <- Fox.assertTrue(datasetService.isEditableBy(dataset, Some(request.identity))) ?~> "notAllowed" ~> FORBIDDEN - _ <- Fox.fromBool(request.identity.isAdminOf(dataset._organization)) ~> FORBIDDEN - _ <- datasetService.deleteVirtualOrDiskDataset(dataset) + _ <- Fox.fromBool(request.identity.isAdminOf(dataset._organization)) ?~> "delete.mustBeOrganizationAdmin" ~> FORBIDDEN + _ <- datasetService.deleteDataset(dataset) } yield Ok } diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index c21e1d0839d..cf3f89eccd3 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -525,10 +525,17 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, }) } yield () - def deleteVirtualOrDiskDataset(dataset: Dataset)(implicit ctx: DBAccessContext): Fox[Unit] = + def deleteDataset(dataset: Dataset)(implicit ctx: DBAccessContext): Fox[Unit] = for { dataSource <- dataSourceFor(dataset) datastoreClient <- clientFor(dataset) + /* Find paths not used by other datasets (neither as realpath nor as path), delete those + (Caution, what if symlink chains go through this dataset? those won’t be detected as realpaths) + If not virtual: delete on disk + - delete datasource-properties.json + - delete empty folders + Delete in the DB if no annotations reference it + */ _ <- datastoreClient.deleteOnDisk(dataset._id) ?~> "dataset.delete.failed" } yield () diff --git a/conf/webknossos.latest.routes b/conf/webknossos.latest.routes index 835fb8a87be..c824983e439 100644 --- a/conf/webknossos.latest.routes +++ b/conf/webknossos.latest.routes @@ -104,7 +104,7 @@ POST /datasets/:datasetId/layers/:layer/segmentAnythingMask PUT /datasets/:datasetId/clearThumbnailCache controllers.DatasetController.removeFromThumbnailCache(datasetId: ObjectId) GET /datasets/:datasetName/isValidNewName controllers.DatasetController.isValidNewName(datasetName: String) GET /datasets/:datasetId controllers.DatasetController.read(datasetId: ObjectId, sharingToken: Option[String]) -DELETE /datasets/:datasetId/deleteOnDisk controllers.DatasetController.deleteOnDisk(datasetId: ObjectId) +DELETE /datasets/:datasetId controllers.DatasetController.delete(datasetId: ObjectId) POST /datasets/:datasetId/reserveAttachmentUploadToPath controllers.DatasetController.reserveAttachmentUploadToPath(datasetId: ObjectId) POST /datasets/:datasetId/finishAttachmentUploadToPath controllers.DatasetController.finishAttachmentUploadToPath(datasetId: ObjectId) POST /datasets/:datasetId/reserveUploadToPathsForPreliminary controllers.DatasetController.reserveUploadToPathsForPreliminary(datasetId: ObjectId) From 935b4e9b31a1d4b2832d08d9d60c6423031d5f81 Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 14 Oct 2025 13:50:27 +0200 Subject: [PATCH 38/62] fix compiler errors, wip new deletion logic --- app/models/dataset/Dataset.scala | 4 ++ app/models/dataset/DatasetService.scala | 49 ++++++++++---- .../dataset/WKRemoteDataStoreClient.scala | 7 ++ .../datastore/DataStoreModule.scala | 1 + .../controllers/DataSourceController.scala | 4 +- .../webknossos/datastore/rpc/RPCRequest.scala | 6 ++ .../services/DataSourceService.scala | 52 ++++----------- .../datastore/services/ManagedS3Service.scala | 66 +++++++++++++++++++ .../services/uploading/UploadService.scala | 63 ++---------------- 9 files changed, 140 insertions(+), 112 deletions(-) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index ba6f5b124f6..88a94b11f4c 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -896,6 +896,8 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte magInfos = rowsToMagInfos(rows) } yield magInfos + def findPathsUsedOnlyByThisDataset(datasetId: ObjectId): Fox[Seq[UPath]] = ??? // TODO + private def parseMagLocator(row: DatasetMagsRow): Fox[MagLocator] = for { mag <- parseMag(row.mag) @@ -1265,6 +1267,8 @@ class DatasetLayerAttachmentsDAO @Inject()(sqlClient: SqlClient)(implicit ec: Ex ${datasetIdOpt.map(datasetId => q"AND ranked._dataset = $datasetId").getOrElse(q"")}; """.as[StorageRelevantDataLayerAttachment]) } yield storageRelevantAttachments.toList + + def findPathsUsedOnlyByThisDataset(datasetId: ObjectId): Fox[Seq[UPath]] = ??? // TODO } class DatasetCoordinateTransformationsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionContext) diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index cf3f89eccd3..036547a4e3c 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -42,6 +42,7 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, datasetLastUsedTimesDAO: DatasetLastUsedTimesDAO, datasetDataLayerDAO: DatasetLayerDAO, datasetMagsDAO: DatasetMagsDAO, + datasetLayerAttachmentsDAO: DatasetLayerAttachmentsDAO, teamDAO: TeamDAO, folderDAO: FolderDAO, multiUserDAO: MultiUserDAO, @@ -525,19 +526,45 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, }) } yield () + def deleteUnusableDataset(dataset: Dataset): Fox[Unit] = ??? // TODO + def deleteDataset(dataset: Dataset)(implicit ctx: DBAccessContext): Fox[Unit] = - for { - dataSource <- dataSourceFor(dataset) - datastoreClient <- clientFor(dataset) - /* Find paths not used by other datasets (neither as realpath nor as path), delete those + if (!dataset.isUsable) { + deleteUnusableDataset(dataset) + } else { + for { + + /* Find paths not used by other datasets (neither as realpath nor as path), delete those (Caution, what if symlink chains go through this dataset? those won’t be detected as realpaths) - If not virtual: delete on disk - - delete datasource-properties.json - - delete empty folders - Delete in the DB if no annotations reference it - */ - _ <- datastoreClient.deleteOnDisk(dataset._id) ?~> "dataset.delete.failed" - } yield () + If virtual: + - find paths not used by other datasets (neither as realpath nor as path), delete those + If not virtual: + - for path in paths: + - find datasets with realpaths pointing to those paths + - if no such datasets, + - delete on disk, no rewriting symlinks + - else: + - abort + Delete in the DB if no annotations reference it, otherwise mark as deleted and clear datasource + */ + datastoreClient <- clientFor(dataset) + _ <- if (dataset.isVirtual) { + for { + magPathsUsedOnlyByThisDataset <- datasetMagsDAO.findPathsUsedOnlyByThisDataset(dataset._id) + attachmentPathsUsedOnlyByThisDataset <- datasetLayerAttachmentsDAO.findPathsUsedOnlyByThisDataset( + dataset._id) + pathsUsedOnlyByThisDataset = magPathsUsedOnlyByThisDataset ++ attachmentPathsUsedOnlyByThisDataset + _ <- datastoreClient.deletePaths(pathsUsedOnlyByThisDataset) + } yield () + } else { + for { + _ <- Fox.failure("checks!") + _ <- datastoreClient.deleteOnDisk(dataset._id) ?~> "dataset.delete.failed" + } yield () + } + _ <- Fox.failure("mark as deleted in the db!") + } yield () + } def generateDirectoryName(datasetName: String, datasetId: ObjectId): String = TextUtils.normalizeStrong(datasetName) match { diff --git a/app/models/dataset/WKRemoteDataStoreClient.scala b/app/models/dataset/WKRemoteDataStoreClient.scala index b3bac271cd3..dba73cc569a 100644 --- a/app/models/dataset/WKRemoteDataStoreClient.scala +++ b/app/models/dataset/WKRemoteDataStoreClient.scala @@ -126,4 +126,11 @@ class WKRemoteDataStoreClient(dataStore: DataStore, rpc: RPC) extends LazyLoggin .delete() } yield () + def deletePaths(paths: Seq[UPath]): Fox[Unit] = + for { + _ <- rpc(s"${dataStore.url}/data/datasets/deletePaths") // TODO datastore-side + .addQueryString("token" -> RpcTokenHolder.webknossosToken) + .deleteJson(paths) + } yield () + } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala index f5630730bb2..6d619bbecfc 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/DataStoreModule.scala @@ -38,6 +38,7 @@ class DataStoreModule extends AbstractModule { bind(classOf[DataStoreConfig]).asEagerSingleton() bind(classOf[DataStoreAccessTokenService]).asEagerSingleton() bind(classOf[ActorSystem]).annotatedWith(Names.named("webknossos-datastore")).toInstance(actorSystem) + bind(classOf[ManagedS3Service]).asEagerSingleton() bind(classOf[UploadService]).asEagerSingleton() bind(classOf[DataSourceService]).asEagerSingleton() bind(classOf[DataVaultService]).asEagerSingleton() diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index d88fa7b984a..8f17d9c67fa 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -410,8 +410,8 @@ class DataSourceController @Inject()( } yield () } else for { - _ <- Fox.runIf(dataSourceService.datasetInControlledS3(dataSource))( - dataSourceService.deleteFromControlledS3(dataSource, datasetId)) + _ <- Fox.runIf(dataSourceService.datasetIsInManagedS3(dataSource))( + dataSourceService.deleteFromManagedS3(dataSource, datasetId)) } yield () _ <- dsRemoteWebknossosClient.deleteDataset(datasetId) } yield Ok diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/rpc/RPCRequest.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/rpc/RPCRequest.scala index 4621c9d3f08..729977788bf 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/rpc/RPCRequest.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/rpc/RPCRequest.scala @@ -198,6 +198,12 @@ class RPCRequest(val id: Int, val url: String, wsClient: WSClient)(implicit ec: performRequest } + def deleteJson[T: Writes](body: T): Fox[WSResponse] = { + request = + request.addHttpHeaders(HeaderNames.CONTENT_TYPE -> jsonMimeType).withBody(Json.toJson(body)).withMethod("DELETE") + performRequest + } + def delete(): Fox[WSResponse] = { request = request.withMethod("DELETE") performRequest diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index 18015ab0908..f862cef971f 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -11,20 +11,13 @@ import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.dataformats.{MagLocator, MappingProvider} import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, IntervalScheduler, MagLinkInfo, UPath} import com.scalableminds.webknossos.datastore.models.datasource._ -import com.scalableminds.webknossos.datastore.storage.{ - CredentialConfigReader, - RemoteSourceDescriptorService, - S3AccessKeyCredential -} +import com.scalableminds.webknossos.datastore.storage.{CredentialConfigReader, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging import com.scalableminds.util.tools.Box.tryo import com.scalableminds.util.tools._ import com.scalableminds.webknossos.datastore.datavault.S3DataVault import play.api.inject.ApplicationLifecycle import play.api.libs.json.Json -import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} -import software.amazon.awssdk.core.checksums.RequestChecksumCalculation -import software.amazon.awssdk.regions.Region import software.amazon.awssdk.services.s3.S3AsyncClient import software.amazon.awssdk.services.s3.model.{ Delete, @@ -41,11 +34,11 @@ import scala.concurrent.ExecutionContext import scala.concurrent.duration._ import scala.jdk.CollectionConverters._ import scala.jdk.FutureConverters._ -import scala.io.Source class DataSourceService @Inject()( config: DataStoreConfig, remoteSourceDescriptorService: RemoteSourceDescriptorService, + managedS3Service: ManagedS3Service, val remoteWebknossosClient: DSRemoteWebknossosClient, val lifecycle: ApplicationLifecycle, @Named("webknossos-datastore") val actorSystem: ActorSystem @@ -321,6 +314,7 @@ class DataSourceService @Inject()( } yield dataLayer.mags.length } yield removedEntriesList.sum + // TODO move to ManagedS3Service private lazy val globalCredentials = { val res = config.Datastore.DataVaults.credentials.flatMap { credentialConfig => new CredentialConfigReader(credentialConfig).getCredential @@ -329,7 +323,7 @@ class DataSourceService @Inject()( res } - def datasetInControlledS3(dataSource: UsableDataSource): Boolean = { + def datasetIsInManagedS3(dataSource: UsableDataSource): Boolean = { def commonPrefix(strings: Seq[String]): String = { if (strings.isEmpty) return "" @@ -344,33 +338,8 @@ class DataSourceService @Inject()( matchingCredentials.nonEmpty && sharedPath.startsWith("s3") } - private lazy val s3UploadCredentialsOpt: Option[(String, String)] = - config.Datastore.DataVaults.credentials.flatMap { credentialConfig => - new CredentialConfigReader(credentialConfig).getCredential - }.collectFirst { - case S3AccessKeyCredential(credentialName, accessKeyId, secretAccessKey, _, _) - if config.Datastore.S3Upload.credentialName == credentialName => - (accessKeyId, secretAccessKey) - } - private lazy val s3Client: S3AsyncClient = S3AsyncClient - .builder() - .credentialsProvider( - StaticCredentialsProvider.create( - AwsBasicCredentials.builder - .accessKeyId(s3UploadCredentialsOpt.getOrElse(("", ""))._1) - .secretAccessKey(s3UploadCredentialsOpt.getOrElse(("", ""))._2) - .build() - )) - .crossRegionAccessEnabled(true) - .forcePathStyle(true) - .endpointOverride(new URI(config.Datastore.S3Upload.endpoint)) - .region(Region.US_EAST_1) - // Disabling checksum calculation prevents files being stored with Content Encoding "aws-chunked". - .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) - .build() - - def deleteFromControlledS3(dataSource: UsableDataSource, datasetId: ObjectId): Fox[Unit] = { - def deleteBatch(bucket: String, keys: Seq[String]): Fox[DeleteObjectsResponse] = + def deleteFromManagedS3(dataSource: UsableDataSource, datasetId: ObjectId): Fox[Unit] = { + def deleteBatch(s3Client: S3AsyncClient, bucket: String, keys: Seq[String]): Fox[DeleteObjectsResponse] = if (keys.isEmpty) Fox.empty else { Fox.fromFuture( @@ -392,7 +361,7 @@ class DataSourceService @Inject()( .asScala) } - def listKeysAtPrefix(bucket: String, prefix: String): Fox[Seq[String]] = { + def listKeysAtPrefix(s3Client: S3AsyncClient, bucket: String, prefix: String): Fox[Seq[String]] = { def listRec(continuationToken: Option[String], acc: Seq[String]): Fox[Seq[String]] = { val builder = ListObjectsV2Request.builder().bucket(bucket).prefix(prefix).maxKeys(1000) val request = continuationToken match { @@ -416,6 +385,7 @@ class DataSourceService @Inject()( for { _ <- Fox.successful(()) layersAndLinkedMags <- remoteWebknossosClient.fetchPaths(datasetId) + s3Client <- managedS3Service.s3ClientBox.toFox magsLinkedByOtherDatasets: Set[MagLinkInfo] = layersAndLinkedMags .flatMap(layerInfo => layerInfo.magLinkInfos.filter(_.linkedMags.nonEmpty)) .toSet @@ -429,11 +399,11 @@ class DataSourceService @Inject()( .hostBucketFromUri(new URI(firstPath.toString)) .toFox ?~> s"Could not determine S3 bucket from path $firstPath" prefixes <- Fox.combined(paths.map(path => S3DataVault.objectKeyFromUri(new URI(path.toString)).toFox)) - keys: Seq[String] <- Fox.serialCombined(prefixes)(listKeysAtPrefix(bucket, _)).map(_.flatten) + keys: Seq[String] <- Fox.serialCombined(prefixes)(listKeysAtPrefix(s3Client, bucket, _)).map(_.flatten) uniqueKeys = keys.distinct _ = logger.info( - s"Deleting ${uniqueKeys.length} objects from controlled S3 bucket $bucket for dataset ${dataSource.id}") - _ <- Fox.serialCombined(uniqueKeys.grouped(1000).toSeq)(deleteBatch(bucket, _)).map(_ => ()) + s"Deleting ${uniqueKeys.length} objects from managed S3 bucket $bucket for dataset ${dataSource.id}") + _ <- Fox.serialCombined(uniqueKeys.grouped(1000).toSeq)(deleteBatch(s3Client, bucket, _)).map(_ => ()) } yield () }) } yield () diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala new file mode 100644 index 00000000000..52e0ecfe608 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala @@ -0,0 +1,66 @@ +package com.scalableminds.webknossos.datastore.services + +import com.scalableminds.util.tools.Box +import com.scalableminds.util.tools.Box.tryo +import com.scalableminds.webknossos.datastore.DataStoreConfig +import com.scalableminds.webknossos.datastore.datavault.S3DataVault +import com.scalableminds.webknossos.datastore.storage.{CredentialConfigReader, S3AccessKeyCredential} +import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} +import software.amazon.awssdk.core.checksums.RequestChecksumCalculation +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.s3.S3AsyncClient +import software.amazon.awssdk.transfer.s3.S3TransferManager + +import java.net.URI +import javax.inject.Inject + +class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) { + + private lazy val s3UploadCredentialsOpt: Option[(String, String)] = + dataStoreConfig.Datastore.DataVaults.credentials.flatMap { credentialConfig => + new CredentialConfigReader(credentialConfig).getCredential + }.collectFirst { + case S3AccessKeyCredential(credentialName, accessKeyId, secretAccessKey, _, _) + if dataStoreConfig.Datastore.S3Upload.credentialName == credentialName => + (accessKeyId, secretAccessKey) + } + + lazy val s3UploadBucketOpt: Option[String] = + S3DataVault.hostBucketFromUri(new URI(dataStoreConfig.Datastore.S3Upload.credentialName)) + + private lazy val s3UploadEndpoint: URI = { + val credentialUri = new URI(dataStoreConfig.Datastore.S3Upload.credentialName) + new URI( + "https", + null, + credentialUri.getHost, + -1, + null, + null, + null + ) + } + + lazy val s3ClientBox: Box[S3AsyncClient] = for { + accessKeyId <- Box(s3UploadCredentialsOpt.map(_._1)) + secretAccessKey <- Box(s3UploadCredentialsOpt.map(_._2)) + client <- tryo( + S3AsyncClient + .builder() + .credentialsProvider(StaticCredentialsProvider.create( + AwsBasicCredentials.builder.accessKeyId(accessKeyId).secretAccessKey(secretAccessKey).build() + )) + .crossRegionAccessEnabled(true) + .forcePathStyle(true) + .endpointOverride(s3UploadEndpoint) + .region(Region.US_EAST_1) + // Disabling checksum calculation prevents files being stored with Content Encoding "aws-chunked". + .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) + .build()) + } yield client + + lazy val transferManagerBox: Box[S3TransferManager] = for { + client <- s3ClientBox + } yield S3TransferManager.builder().s3Client(client).build() + +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index b1b6c23c2cb..5db64024f80 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -14,27 +14,16 @@ import com.scalableminds.webknossos.datastore.datareaders.n5.{N5Header, N5Metada import com.scalableminds.webknossos.datastore.datareaders.precomputed.PrecomputedHeader.FILENAME_INFO import com.scalableminds.webknossos.datastore.datareaders.zarr.NgffMetadata.FILENAME_DOT_ZATTRS import com.scalableminds.webknossos.datastore.datareaders.zarr.ZarrHeader.FILENAME_DOT_ZARRAY -import com.scalableminds.webknossos.datastore.datavault.S3DataVault import com.scalableminds.webknossos.datastore.explore.ExploreLocalLayerService import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, DirectoryConstants, UPath} import com.scalableminds.webknossos.datastore.models.UnfinishedUpload import com.scalableminds.webknossos.datastore.models.datasource.UsableDataSource.FILENAME_DATASOURCE_PROPERTIES_JSON import com.scalableminds.webknossos.datastore.models.datasource._ -import com.scalableminds.webknossos.datastore.services.{DSRemoteWebknossosClient, DataSourceService} -import com.scalableminds.webknossos.datastore.storage.{ - CredentialConfigReader, - DataStoreRedisStore, - RemoteSourceDescriptorService, - S3AccessKeyCredential -} +import com.scalableminds.webknossos.datastore.services.{DSRemoteWebknossosClient, DataSourceService, ManagedS3Service} +import com.scalableminds.webknossos.datastore.storage.{DataStoreRedisStore, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging import org.apache.commons.io.FileUtils import play.api.libs.json.{Json, OFormat, Reads} -import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} -import software.amazon.awssdk.core.checksums.RequestChecksumCalculation -import software.amazon.awssdk.regions.Region -import software.amazon.awssdk.services.s3.S3AsyncClient -import software.amazon.awssdk.transfer.s3.S3TransferManager import software.amazon.awssdk.transfer.s3.model.UploadDirectoryRequest import java.io.{File, RandomAccessFile} @@ -105,6 +94,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, remoteSourceDescriptorService: RemoteSourceDescriptorService, exploreLocalLayerService: ExploreLocalLayerService, dataStoreConfig: DataStoreConfig, + managedS3Service: ManagedS3Service, val remoteWebknossosClient: DSRemoteWebknossosClient)(implicit ec: ExecutionContext) extends DatasetDeleter with DirectoryConstants @@ -426,7 +416,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ <- deleteFilesNotReferencedInDataSource(unpackedDir, usableDataSourceFromDir) newBasePath <- if (dataStoreConfig.Datastore.S3Upload.enabled) { for { - s3UploadBucket <- s3UploadBucketOpt.toFox + s3UploadBucket <- managedS3Service.s3UploadBucketOpt.toFox _ = logger.info(s"finishUpload for $datasetId: Copying data to s3 bucket $s3UploadBucket...") beforeS3Upload = Instant.now s3ObjectKey = s"${dataStoreConfig.Datastore.S3Upload.objectKeyPrefix}/${dataSourceId.organizationId}/${dataSourceId.directoryName}/" @@ -520,56 +510,13 @@ class UploadService @Inject()(dataSourceService: DataSourceService, exploreLocalLayerService.writeLocalDatasourceProperties(dataSource, path)) } yield path - private lazy val s3UploadCredentialsOpt: Option[(String, String)] = - dataStoreConfig.Datastore.DataVaults.credentials.flatMap { credentialConfig => - new CredentialConfigReader(credentialConfig).getCredential - }.collectFirst { - case S3AccessKeyCredential(credentialName, accessKeyId, secretAccessKey, _, _) - if dataStoreConfig.Datastore.S3Upload.credentialName == credentialName => - (accessKeyId, secretAccessKey) - } - - private lazy val s3UploadBucketOpt: Option[String] = - S3DataVault.hostBucketFromUri(new URI(dataStoreConfig.Datastore.S3Upload.credentialName)) - - private lazy val s3UploadEndpoint: URI = { - val credentialUri = new URI(dataStoreConfig.Datastore.S3Upload.credentialName) - new URI( - "https", - null, - credentialUri.getHost, - -1, - null, - null, - null - ) - } - - private lazy val getS3TransferManager: Box[S3TransferManager] = for { - accessKeyId <- Box(s3UploadCredentialsOpt.map(_._1)) - secretAccessKey <- Box(s3UploadCredentialsOpt.map(_._2)) - client <- tryo( - S3AsyncClient - .builder() - .credentialsProvider(StaticCredentialsProvider.create( - AwsBasicCredentials.builder.accessKeyId(accessKeyId).secretAccessKey(secretAccessKey).build() - )) - .crossRegionAccessEnabled(true) - .forcePathStyle(true) - .endpointOverride(s3UploadEndpoint) - .region(Region.US_EAST_1) - // Disabling checksum calculation prevents files being stored with Content Encoding "aws-chunked". - .requestChecksumCalculation(RequestChecksumCalculation.WHEN_REQUIRED) - .build()) - } yield S3TransferManager.builder().s3Client(client).build() - private def uploadDirectoryToS3( dataDir: Path, bucketName: String, prefix: String ): Fox[Unit] = for { - transferManager <- getS3TransferManager.toFox ?~> "S3 upload is not properly configured, cannot get S3 client" + transferManager <- managedS3Service.transferManagerBox.toFox ?~> "S3 upload is not properly configured, cannot get S3 client" directoryUpload = transferManager.uploadDirectory( UploadDirectoryRequest.builder().bucket(bucketName).s3Prefix(prefix).source(dataDir).build() ) From e520cc9989aab14e9fad5dfd3b6107b3d71b60d7 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 15 Oct 2025 11:35:23 +0200 Subject: [PATCH 39/62] implement delete paths on datastore side --- .../dataset/WKRemoteDataStoreClient.scala | 2 +- .../controllers/DataSourceController.scala | 29 ++-- .../datastore/helpers/DatasetDeleter.scala | 1 + .../datastore/helpers/MagLinkInfo.scala | 2 + .../services/DataSourceService.scala | 133 ++++++++++++------ .../conf/datastore.latest.routes | 1 + 6 files changed, 107 insertions(+), 61 deletions(-) diff --git a/app/models/dataset/WKRemoteDataStoreClient.scala b/app/models/dataset/WKRemoteDataStoreClient.scala index 6a5868c5ac6..5a6c1a922e0 100644 --- a/app/models/dataset/WKRemoteDataStoreClient.scala +++ b/app/models/dataset/WKRemoteDataStoreClient.scala @@ -129,7 +129,7 @@ class WKRemoteDataStoreClient(dataStore: DataStore, rpc: RPC) extends LazyLoggin def deletePaths(paths: Seq[UPath]): Fox[Unit] = for { _ <- rpc(s"${dataStore.url}/data/datasets/deletePaths") // TODO datastore-side - .addQueryString("token" -> RpcTokenHolder.webknossosToken) + .addQueryParam("token", RpcTokenHolder.webknossosToken) .deleteJson(paths) } yield () diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 66dbcfddbdd..14b9d7441c6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -407,20 +407,21 @@ class DataSourceController @Inject()( for { dataSource <- datasetCache.getById(datasetId) ~> NOT_FOUND dataSourceId = dataSource.id - _ <- if (dataSourceService.existsOnDisk(dataSourceId)) { - for { - _ <- dataSourceService.deleteOnDisk( - dataSourceId.organizationId, - dataSourceId.directoryName, - Some(datasetId), - reason = Some("the user wants to delete the dataset")) ?~> "dataset.delete.failed" - } yield () - } else - for { - _ <- Fox.runIf(dataSourceService.datasetIsInManagedS3(dataSource))( - dataSourceService.deleteFromManagedS3(dataSource, datasetId)) - } yield () - _ <- dsRemoteWebknossosClient.deleteDataset(datasetId) + _ <- dataSourceService.deleteOnDisk( + dataSourceId.organizationId, + dataSourceId.directoryName, + Some(datasetId), + reason = Some("the user wants to delete the dataset")) ?~> "dataset.delete.failed" + + } yield Ok + } + } + + def deletePaths(): Action[Seq[UPath]] = + Action.async(validateJson[Seq[UPath]]) { implicit request => + accessTokenService.validateAccessFromTokenContext(UserAccessRequest.webknossos) { + for { + _ <- dataSourceService.deletePathsFromDiskOrManagedS3(request.body) } yield Ok } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala index f953fb77083..968080e2fd1 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala @@ -78,6 +78,7 @@ trait DatasetDeleter extends LazyLogging with DirectoryConstants with FoxImplici // Handle references to layers and mags that are deleted + // TODO remove? private def moveSymlinks(organizationId: String, datasetName: String, datasetId: ObjectId)( implicit ec: ExecutionContext) = for { diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/MagLinkInfo.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/MagLinkInfo.scala index c5076828bce..2b07d1a2779 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/MagLinkInfo.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/MagLinkInfo.scala @@ -26,3 +26,5 @@ case class LayerMagLinkInfo(layerName: String, magLinkInfos: Seq[MagLinkInfo]) object LayerMagLinkInfo { implicit val jsonFormat: Format[LayerMagLinkInfo] = Json.format[LayerMagLinkInfo] } + +// TODO remove? diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index f862cef971f..596d47cc272 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -9,7 +9,13 @@ import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.dataformats.{MagLocator, MappingProvider} -import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, IntervalScheduler, MagLinkInfo, UPath} +import com.scalableminds.webknossos.datastore.helpers.{ + DatasetDeleter, + IntervalScheduler, + MagLinkInfo, + PathSchemes, + UPath +} import com.scalableminds.webknossos.datastore.models.datasource._ import com.scalableminds.webknossos.datastore.storage.{CredentialConfigReader, RemoteSourceDescriptorService} import com.typesafe.scalalogging.LazyLogging @@ -323,6 +329,85 @@ class DataSourceService @Inject()( res } + def deletePathsFromDiskOrManagedS3(paths: Seq[UPath]): Fox[Unit] = { + val localPaths = paths.filter(_.isLocal) + val managedS3Paths = paths.filter(pathIsInManagedS3) + for { + _ <- Fox.serialCombined(localPaths) { + _.toLocalPath.flatMap { + deleteDirectoryRecursively + }.toFox + } + s3PathsByBucket: Map[Option[String], Seq[UPath]] = managedS3Paths.groupBy(bucketForS3UPath) + _ <- Fox.serialCombined(s3PathsByBucket.keys) { bucket: Option[String] => + deleteS3PathsOnBucket(bucket, s3PathsByBucket(bucket)) + } + } yield () + } + + private def deleteS3PathsOnBucket(bucketOpt: Option[String], paths: Seq[UPath]): Fox[Unit] = + for { + bucket <- bucketOpt.toFox + s3Client <- managedS3Service.s3ClientBox.toFox + prefixes <- Fox.combined(paths.map(path => S3DataVault.objectKeyFromUri(path.toRemoteUriUnsafe).toFox)) + keys: Seq[String] <- Fox.serialCombined(prefixes)(listKeysAtPrefix(s3Client, bucket, _)).map(_.flatten) + uniqueKeys = keys.distinct + _ = logger.info(s"Deleting ${uniqueKeys.length} objects from managed S3 bucket $bucket") + _ <- Fox.serialCombined(uniqueKeys.grouped(1000).toSeq)(deleteBatch(s3Client, bucket, _)).map(_ => ()) + } yield () + + private def deleteBatch(s3Client: S3AsyncClient, bucket: String, keys: Seq[String]): Fox[DeleteObjectsResponse] = + if (keys.isEmpty) Fox.empty + else { + Fox.fromFuture( + s3Client + .deleteObjects( + DeleteObjectsRequest + .builder() + .bucket(bucket) + .delete( + Delete + .builder() + .objects( + keys.map(k => ObjectIdentifier.builder().key(k).build()).asJava + ) + .build() + ) + .build() + ) + .asScala) + } + + private def listKeysAtPrefix(s3Client: S3AsyncClient, bucket: String, prefix: String): Fox[Seq[String]] = { + def listRecursive(continuationToken: Option[String], acc: Seq[String]): Fox[Seq[String]] = { + val builder = ListObjectsV2Request.builder().bucket(bucket).prefix(prefix).maxKeys(1000) + val request = continuationToken match { + case Some(token) => builder.continuationToken(token).build() + case None => builder.build() + } + for { + response <- Fox.fromFuture(s3Client.listObjectsV2(request).asScala) + keys = response.contents().asScala.map(_.key()) + allKeys = acc ++ keys + result <- if (response.isTruncated) { + listRecursive(Option(response.nextContinuationToken()), allKeys) + } else { + Fox.successful(allKeys) + } + } yield result + } + + listRecursive(None, Seq()) + } + + // TODO move to managedS3Service + private def bucketForS3UPath(path: UPath): Option[String] = + S3DataVault.hostBucketFromUri(path.toRemoteUriUnsafe) + + private def pathIsInManagedS3(path: UPath) = + // TODO guard against string prefix false positives + path.getScheme.contains(PathSchemes.schemeS3) && globalCredentials.exists(c => path.toString.startsWith(c.name)) + def datasetIsInManagedS3(dataSource: UsableDataSource): Boolean = { def commonPrefix(strings: Seq[String]): String = { if (strings.isEmpty) return "" @@ -338,50 +423,7 @@ class DataSourceService @Inject()( matchingCredentials.nonEmpty && sharedPath.startsWith("s3") } - def deleteFromManagedS3(dataSource: UsableDataSource, datasetId: ObjectId): Fox[Unit] = { - def deleteBatch(s3Client: S3AsyncClient, bucket: String, keys: Seq[String]): Fox[DeleteObjectsResponse] = - if (keys.isEmpty) Fox.empty - else { - Fox.fromFuture( - s3Client - .deleteObjects( - DeleteObjectsRequest - .builder() - .bucket(bucket) - .delete( - Delete - .builder() - .objects( - keys.map(k => ObjectIdentifier.builder().key(k).build()).asJava - ) - .build() - ) - .build() - ) - .asScala) - } - - def listKeysAtPrefix(s3Client: S3AsyncClient, bucket: String, prefix: String): Fox[Seq[String]] = { - def listRec(continuationToken: Option[String], acc: Seq[String]): Fox[Seq[String]] = { - val builder = ListObjectsV2Request.builder().bucket(bucket).prefix(prefix).maxKeys(1000) - val request = continuationToken match { - case Some(token) => builder.continuationToken(token).build() - case None => builder.build() - } - for { - response <- Fox.fromFuture(s3Client.listObjectsV2(request).asScala) - keys = response.contents().asScala.map(_.key()) - allKeys = acc ++ keys - result <- if (response.isTruncated) { - listRec(Option(response.nextContinuationToken()), allKeys) - } else { - Fox.successful(allKeys) - } - } yield result - } - listRec(None, Seq()) - } - + def deleteFromManagedS3(dataSource: UsableDataSource, datasetId: ObjectId): Fox[Unit] = for { _ <- Fox.successful(()) layersAndLinkedMags <- remoteWebknossosClient.fetchPaths(datasetId) @@ -407,5 +449,4 @@ class DataSourceService @Inject()( } yield () }) } yield () - } } diff --git a/webknossos-datastore/conf/datastore.latest.routes b/webknossos-datastore/conf/datastore.latest.routes index 605e9f5e3e7..d93b7ee0399 100644 --- a/webknossos-datastore/conf/datastore.latest.routes +++ b/webknossos-datastore/conf/datastore.latest.routes @@ -110,6 +110,7 @@ POST /datasets/cancelUpload POST /datasets/measureUsedStorage/:organizationId @com.scalableminds.webknossos.datastore.controllers.DataSourceController.measureUsedStorage(organizationId: String) PUT /datasets/:datasetId @com.scalableminds.webknossos.datastore.controllers.DataSourceController.updateOnDisk(datasetId: ObjectId) DELETE /datasets/:datasetId/deleteOnDisk @com.scalableminds.webknossos.datastore.controllers.DataSourceController.deleteOnDisk(datasetId: ObjectId) +DELETE /datasets/deletePaths @com.scalableminds.webknossos.datastore.controllers.DataSourceController.deletePaths() POST /datasets/exploreRemote @com.scalableminds.webknossos.datastore.controllers.DataSourceController.exploreRemoteDataset() POST /datasets/validatePaths @com.scalableminds.webknossos.datastore.controllers.DataSourceController.validatePaths() From e704481cf8ec5fbb0bf6c582e67fc2ee360bd11d Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 15 Oct 2025 11:46:44 +0200 Subject: [PATCH 40/62] cleanup --- .../services/DataSourceService.scala | 143 +----------------- .../datastore/services/ManagedS3Service.scala | 100 +++++++++++- 2 files changed, 101 insertions(+), 142 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index 596d47cc272..96fad9a85e5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -5,41 +5,22 @@ import com.google.inject.Inject import com.google.inject.name.Named import com.scalableminds.util.io.PathUtils import com.scalableminds.util.mvc.Formatter -import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.dataformats.{MagLocator, MappingProvider} -import com.scalableminds.webknossos.datastore.helpers.{ - DatasetDeleter, - IntervalScheduler, - MagLinkInfo, - PathSchemes, - UPath -} +import com.scalableminds.webknossos.datastore.helpers.{DatasetDeleter, IntervalScheduler, UPath} import com.scalableminds.webknossos.datastore.models.datasource._ -import com.scalableminds.webknossos.datastore.storage.{CredentialConfigReader, RemoteSourceDescriptorService} +import com.scalableminds.webknossos.datastore.storage.RemoteSourceDescriptorService import com.typesafe.scalalogging.LazyLogging import com.scalableminds.util.tools.Box.tryo import com.scalableminds.util.tools._ -import com.scalableminds.webknossos.datastore.datavault.S3DataVault import play.api.inject.ApplicationLifecycle import play.api.libs.json.Json -import software.amazon.awssdk.services.s3.S3AsyncClient -import software.amazon.awssdk.services.s3.model.{ - Delete, - DeleteObjectsRequest, - DeleteObjectsResponse, - ListObjectsV2Request, - ObjectIdentifier -} import java.io.File -import java.net.URI import java.nio.file.{Files, Path} import scala.concurrent.ExecutionContext import scala.concurrent.duration._ -import scala.jdk.CollectionConverters._ -import scala.jdk.FutureConverters._ class DataSourceService @Inject()( config: DataStoreConfig, @@ -320,133 +301,17 @@ class DataSourceService @Inject()( } yield dataLayer.mags.length } yield removedEntriesList.sum - // TODO move to ManagedS3Service - private lazy val globalCredentials = { - val res = config.Datastore.DataVaults.credentials.flatMap { credentialConfig => - new CredentialConfigReader(credentialConfig).getCredential - } - logger.info(s"Parsed ${res.length} global data vault credentials from datastore config.") - res - } - def deletePathsFromDiskOrManagedS3(paths: Seq[UPath]): Fox[Unit] = { val localPaths = paths.filter(_.isLocal) - val managedS3Paths = paths.filter(pathIsInManagedS3) + val managedS3Paths = paths.filter(managedS3Service.pathIsInManagedS3) for { _ <- Fox.serialCombined(localPaths) { _.toLocalPath.flatMap { deleteDirectoryRecursively }.toFox } - s3PathsByBucket: Map[Option[String], Seq[UPath]] = managedS3Paths.groupBy(bucketForS3UPath) - _ <- Fox.serialCombined(s3PathsByBucket.keys) { bucket: Option[String] => - deleteS3PathsOnBucket(bucket, s3PathsByBucket(bucket)) - } + _ <- managedS3Service.deletePaths(managedS3Paths) } yield () } - private def deleteS3PathsOnBucket(bucketOpt: Option[String], paths: Seq[UPath]): Fox[Unit] = - for { - bucket <- bucketOpt.toFox - s3Client <- managedS3Service.s3ClientBox.toFox - prefixes <- Fox.combined(paths.map(path => S3DataVault.objectKeyFromUri(path.toRemoteUriUnsafe).toFox)) - keys: Seq[String] <- Fox.serialCombined(prefixes)(listKeysAtPrefix(s3Client, bucket, _)).map(_.flatten) - uniqueKeys = keys.distinct - _ = logger.info(s"Deleting ${uniqueKeys.length} objects from managed S3 bucket $bucket") - _ <- Fox.serialCombined(uniqueKeys.grouped(1000).toSeq)(deleteBatch(s3Client, bucket, _)).map(_ => ()) - } yield () - - private def deleteBatch(s3Client: S3AsyncClient, bucket: String, keys: Seq[String]): Fox[DeleteObjectsResponse] = - if (keys.isEmpty) Fox.empty - else { - Fox.fromFuture( - s3Client - .deleteObjects( - DeleteObjectsRequest - .builder() - .bucket(bucket) - .delete( - Delete - .builder() - .objects( - keys.map(k => ObjectIdentifier.builder().key(k).build()).asJava - ) - .build() - ) - .build() - ) - .asScala) - } - - private def listKeysAtPrefix(s3Client: S3AsyncClient, bucket: String, prefix: String): Fox[Seq[String]] = { - def listRecursive(continuationToken: Option[String], acc: Seq[String]): Fox[Seq[String]] = { - val builder = ListObjectsV2Request.builder().bucket(bucket).prefix(prefix).maxKeys(1000) - val request = continuationToken match { - case Some(token) => builder.continuationToken(token).build() - case None => builder.build() - } - for { - response <- Fox.fromFuture(s3Client.listObjectsV2(request).asScala) - keys = response.contents().asScala.map(_.key()) - allKeys = acc ++ keys - result <- if (response.isTruncated) { - listRecursive(Option(response.nextContinuationToken()), allKeys) - } else { - Fox.successful(allKeys) - } - } yield result - } - - listRecursive(None, Seq()) - } - - // TODO move to managedS3Service - private def bucketForS3UPath(path: UPath): Option[String] = - S3DataVault.hostBucketFromUri(path.toRemoteUriUnsafe) - - private def pathIsInManagedS3(path: UPath) = - // TODO guard against string prefix false positives - path.getScheme.contains(PathSchemes.schemeS3) && globalCredentials.exists(c => path.toString.startsWith(c.name)) - - def datasetIsInManagedS3(dataSource: UsableDataSource): Boolean = { - def commonPrefix(strings: Seq[String]): String = { - if (strings.isEmpty) return "" - - strings.reduce { (a, b) => - a.zip(b).takeWhile { case (c1, c2) => c1 == c2 }.map(_._1).mkString - } - } - - val allPaths = dataSource.allExplicitPaths - val sharedPath = commonPrefix(allPaths.map(_.toString)) - val matchingCredentials = globalCredentials.filter(c => sharedPath.startsWith(c.name)) - matchingCredentials.nonEmpty && sharedPath.startsWith("s3") - } - - def deleteFromManagedS3(dataSource: UsableDataSource, datasetId: ObjectId): Fox[Unit] = - for { - _ <- Fox.successful(()) - layersAndLinkedMags <- remoteWebknossosClient.fetchPaths(datasetId) - s3Client <- managedS3Service.s3ClientBox.toFox - magsLinkedByOtherDatasets: Set[MagLinkInfo] = layersAndLinkedMags - .flatMap(layerInfo => layerInfo.magLinkInfos.filter(_.linkedMags.nonEmpty)) - .toSet - linkedMagPaths = magsLinkedByOtherDatasets.flatMap(_.linkedMags).flatMap(_.path) - paths = dataSource.allExplicitPaths.filterNot(path => linkedMagPaths.contains(path.toString)) - _ <- Fox.runIf(paths.nonEmpty)({ - for { - // Assume everything is in the same bucket - firstPath <- paths.headOption.toFox - bucket <- S3DataVault - .hostBucketFromUri(new URI(firstPath.toString)) - .toFox ?~> s"Could not determine S3 bucket from path $firstPath" - prefixes <- Fox.combined(paths.map(path => S3DataVault.objectKeyFromUri(new URI(path.toString)).toFox)) - keys: Seq[String] <- Fox.serialCombined(prefixes)(listKeysAtPrefix(s3Client, bucket, _)).map(_.flatten) - uniqueKeys = keys.distinct - _ = logger.info( - s"Deleting ${uniqueKeys.length} objects from managed S3 bucket $bucket for dataset ${dataSource.id}") - _ <- Fox.serialCombined(uniqueKeys.grouped(1000).toSeq)(deleteBatch(s3Client, bucket, _)).map(_ => ()) - } yield () - }) - } yield () } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala index 52e0ecfe608..11a9343ce5d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala @@ -1,20 +1,32 @@ package com.scalableminds.webknossos.datastore.services -import com.scalableminds.util.tools.Box +import com.scalableminds.util.tools.{Box, Fox, FoxImplicits} import com.scalableminds.util.tools.Box.tryo import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.datavault.S3DataVault +import com.scalableminds.webknossos.datastore.helpers.{PathSchemes, UPath} import com.scalableminds.webknossos.datastore.storage.{CredentialConfigReader, S3AccessKeyCredential} +import com.typesafe.scalalogging.LazyLogging import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} import software.amazon.awssdk.core.checksums.RequestChecksumCalculation import software.amazon.awssdk.regions.Region import software.amazon.awssdk.services.s3.S3AsyncClient +import software.amazon.awssdk.services.s3.model.{ + Delete, + DeleteObjectsRequest, + DeleteObjectsResponse, + ListObjectsV2Request, + ObjectIdentifier +} import software.amazon.awssdk.transfer.s3.S3TransferManager import java.net.URI import javax.inject.Inject +import scala.concurrent.ExecutionContext +import scala.jdk.CollectionConverters._ +import scala.jdk.FutureConverters._ -class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) { +class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) extends FoxImplicits with LazyLogging { private lazy val s3UploadCredentialsOpt: Option[(String, String)] = dataStoreConfig.Datastore.DataVaults.credentials.flatMap { credentialConfig => @@ -41,7 +53,7 @@ class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) { ) } - lazy val s3ClientBox: Box[S3AsyncClient] = for { + private lazy val s3ClientBox: Box[S3AsyncClient] = for { accessKeyId <- Box(s3UploadCredentialsOpt.map(_._1)) secretAccessKey <- Box(s3UploadCredentialsOpt.map(_._2)) client <- tryo( @@ -63,4 +75,86 @@ class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) { client <- s3ClientBox } yield S3TransferManager.builder().s3Client(client).build() + def deletePaths(paths: Seq[UPath])(implicit ec: ExecutionContext): Fox[Unit] = { + val pathsByBucket: Map[Option[String], Seq[UPath]] = paths.groupBy(bucketForS3UPath) + for { + _ <- Fox.serialCombined(pathsByBucket.keys) { bucket: Option[String] => + deleteS3PathsOnBucket(bucket, pathsByBucket(bucket)) + } + } yield () + } + + private def deleteS3PathsOnBucket(bucketOpt: Option[String], paths: Seq[UPath])( + implicit ec: ExecutionContext): Fox[Unit] = + for { + bucket <- bucketOpt.toFox ?~> "Could not determine S3 bucket from UPath" + s3Client <- s3ClientBox.toFox ?~> "No managed s3 client configured" + prefixes <- Fox.combined(paths.map(path => S3DataVault.objectKeyFromUri(path.toRemoteUriUnsafe).toFox)) + keys: Seq[String] <- Fox.serialCombined(prefixes)(listKeysAtPrefix(s3Client, bucket, _)).map(_.flatten) + uniqueKeys = keys.distinct + _ = logger.info(s"Deleting ${uniqueKeys.length} objects from managed S3 bucket $bucket") + _ <- Fox.serialCombined(uniqueKeys.grouped(1000).toSeq)(deleteBatch(s3Client, bucket, _)).map(_ => ()) + } yield () + + private def deleteBatch(s3Client: S3AsyncClient, bucket: String, keys: Seq[String])( + implicit ec: ExecutionContext): Fox[DeleteObjectsResponse] = + if (keys.isEmpty) Fox.empty + else { + Fox.fromFuture( + s3Client + .deleteObjects( + DeleteObjectsRequest + .builder() + .bucket(bucket) + .delete( + Delete + .builder() + .objects( + keys.map(k => ObjectIdentifier.builder().key(k).build()).asJava + ) + .build() + ) + .build() + ) + .asScala) + } + + private def listKeysAtPrefix(s3Client: S3AsyncClient, bucket: String, prefix: String)( + implicit ec: ExecutionContext): Fox[Seq[String]] = { + def listRecursive(continuationToken: Option[String], acc: Seq[String]): Fox[Seq[String]] = { + val builder = ListObjectsV2Request.builder().bucket(bucket).prefix(prefix).maxKeys(1000) + val request = continuationToken match { + case Some(token) => builder.continuationToken(token).build() + case None => builder.build() + } + for { + response <- Fox.fromFuture(s3Client.listObjectsV2(request).asScala) + keys = response.contents().asScala.map(_.key()) + allKeys = acc ++ keys + result <- if (response.isTruncated) { + listRecursive(Option(response.nextContinuationToken()), allKeys) + } else { + Fox.successful(allKeys) + } + } yield result + } + + listRecursive(None, Seq()) + } + + private lazy val globalCredentials = { + val res = dataStoreConfig.Datastore.DataVaults.credentials.flatMap { credentialConfig => + new CredentialConfigReader(credentialConfig).getCredential + } + logger.info(s"Parsed ${res.length} global data vault credentials from datastore config.") + res + } + + private def bucketForS3UPath(path: UPath): Option[String] = + S3DataVault.hostBucketFromUri(path.toRemoteUriUnsafe) + + def pathIsInManagedS3(path: UPath): Boolean = + // TODO guard against string prefix false positives + path.getScheme.contains(PathSchemes.schemeS3) && globalCredentials.exists(c => path.toString.startsWith(c.name)) + } From 834e6e425c0cd1e26499a01d9e1a56e8cce96462 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 15 Oct 2025 11:56:37 +0200 Subject: [PATCH 41/62] remove dataset also from postgres --- .../WKRemoteDataStoreController.scala | 12 +------ app/models/dataset/DatasetService.scala | 32 ++++++++++++------- .../dataset/WKRemoteDataStoreClient.scala | 2 +- .../com/scalableminds/util/tools/Fox.scala | 2 -- .../controllers/DataSourceController.scala | 2 +- 5 files changed, 24 insertions(+), 26 deletions(-) diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index e5fa4a2fc79..86d53cb95a1 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -211,17 +211,7 @@ class WKRemoteDataStoreController @Inject()( implicit request => dataStoreService.validateAccess(name, key) { _ => for { - existingDatasetBox <- datasetDAO.findOne(request.body)(GlobalAccessContext).shiftBox - _ <- existingDatasetBox match { - case Full(dataset) => - for { - annotationCount <- annotationDAO.countAllByDataset(dataset._id)(GlobalAccessContext) - _ = datasetDAO - .deleteDataset(dataset._id, onlyMarkAsDeleted = annotationCount > 0) - .flatMap(_ => usedStorageService.refreshStorageReportForDataset(dataset)) - } yield () - case _ => Fox.successful(()) - } + _ <- datasetService.deleteDatasetFromDB(request.body)(GlobalAccessContext) } yield Ok } } diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index 036547a4e3c..9594c9681e9 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -5,16 +5,7 @@ import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Empty, EmptyBox, Fox, FoxImplicits, Full, JsonHelper, TextUtils} import com.scalableminds.webknossos.datastore.helpers.{DataSourceMagInfo, UPath} -import com.scalableminds.webknossos.datastore.models.datasource.{ - DataSource, - DataSourceId, - DataSourceStatus, - StaticColorLayer, - StaticLayer, - StaticSegmentationLayer, - UnusableDataSource, - UsableDataSource -} +import com.scalableminds.webknossos.datastore.models.datasource.{DataSource, DataSourceId, DataSourceStatus, StaticColorLayer, StaticLayer, StaticSegmentationLayer, UnusableDataSource, UsableDataSource} import com.scalableminds.webknossos.datastore.rpc.RPC import com.scalableminds.webknossos.datastore.services.DataSourcePathInfo import com.typesafe.scalalogging.LazyLogging @@ -25,6 +16,8 @@ import models.user.{MultiUserDAO, User, UserService} import com.scalableminds.webknossos.datastore.controllers.PathValidationResult import mail.{MailchimpClient, MailchimpTag} import models.analytics.{AnalyticsService, UploadDatasetEvent} +import models.annotation.AnnotationDAO +import models.storage.UsedStorageService import play.api.http.Status.NOT_FOUND import play.api.i18n.{Messages, MessagesProvider} import play.api.libs.json.{JsObject, Json} @@ -53,6 +46,8 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, teamService: TeamService, thumbnailCachingService: ThumbnailCachingService, userService: UserService, + annotationDAO: AnnotationDAO, + usedStorageService: UsedStorageService, conf: WkConf, rpc: RPC)(implicit ec: ExecutionContext) extends FoxImplicits @@ -562,10 +557,25 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, _ <- datastoreClient.deleteOnDisk(dataset._id) ?~> "dataset.delete.failed" } yield () } - _ <- Fox.failure("mark as deleted in the db!") + _ <- deleteDatasetFromDB(dataset._id) } yield () } + def deleteDatasetFromDB(datasetId: ObjectId)(implicit ctx: DBAccessContext): Fox[Unit] = + for { + existingDatasetBox <- datasetDAO.findOne(datasetId)(GlobalAccessContext).shiftBox + _ <- existingDatasetBox match { + case Full(dataset) => + for { + annotationCount <- annotationDAO.countAllByDataset(dataset._id)(GlobalAccessContext) + _ = datasetDAO + .deleteDataset(dataset._id, onlyMarkAsDeleted = annotationCount > 0) + .flatMap(_ => usedStorageService.refreshStorageReportForDataset(dataset)) + } yield () + case _ => Fox.successful(()) + } + } yield () + def generateDirectoryName(datasetName: String, datasetId: ObjectId): String = TextUtils.normalizeStrong(datasetName) match { case Some(prefix) => s"$prefix-$datasetId" diff --git a/app/models/dataset/WKRemoteDataStoreClient.scala b/app/models/dataset/WKRemoteDataStoreClient.scala index 5a6c1a922e0..4398bac0211 100644 --- a/app/models/dataset/WKRemoteDataStoreClient.scala +++ b/app/models/dataset/WKRemoteDataStoreClient.scala @@ -128,7 +128,7 @@ class WKRemoteDataStoreClient(dataStore: DataStore, rpc: RPC) extends LazyLoggin def deletePaths(paths: Seq[UPath]): Fox[Unit] = for { - _ <- rpc(s"${dataStore.url}/data/datasets/deletePaths") // TODO datastore-side + _ <- rpc(s"${dataStore.url}/data/datasets/deletePaths") .addQueryParam("token", RpcTokenHolder.webknossosToken) .deleteJson(paths) } yield () diff --git a/util/src/main/scala/com/scalableminds/util/tools/Fox.scala b/util/src/main/scala/com/scalableminds/util/tools/Fox.scala index 7037dc97ca5..f911b4a841f 100644 --- a/util/src/main/scala/com/scalableminds/util/tools/Fox.scala +++ b/util/src/main/scala/com/scalableminds/util/tools/Fox.scala @@ -1,7 +1,5 @@ package com.scalableminds.util.tools -import com.scalableminds.util.tools.{Box, Empty, Failure, Full, ParamFailure} - import scala.concurrent.duration._ import scala.concurrent.{Await, ExecutionContext, Future} import scala.util.{Success, Try} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 14b9d7441c6..93d73f4d713 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -405,7 +405,7 @@ class DataSourceController @Inject()( Action.async { implicit request => accessTokenService.validateAccessFromTokenContext(UserAccessRequest.webknossos) { for { - dataSource <- datasetCache.getById(datasetId) ~> NOT_FOUND + dataSource <- dsRemoteWebknossosClient.getDataSource(datasetId) ~> NOT_FOUND dataSourceId = dataSource.id _ <- dataSourceService.deleteOnDisk( dataSourceId.organizationId, From 96c43fef58ed6ae5382d6433d874494fa6a52929 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 15 Oct 2025 12:03:20 +0200 Subject: [PATCH 42/62] cleanup --- app/controllers/WKRemoteDataStoreController.scala | 6 ++---- app/models/dataset/DatasetService.scala | 13 +++++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index 86d53cb95a1..efd00326fcc 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -3,7 +3,7 @@ package controllers import com.scalableminds.util.accesscontext.{AuthorizedAccessContext, DBAccessContext, GlobalAccessContext} import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant -import com.scalableminds.util.tools.{Fox, Full} +import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.controllers.JobExportProperties import com.scalableminds.webknossos.datastore.helpers.{LayerMagLinkInfo, MagLinkInfo} import com.scalableminds.webknossos.datastore.models.UnfinishedUpload @@ -20,7 +20,6 @@ import com.scalableminds.webknossos.datastore.services.uploading.{ ReserveUploadInformation } import com.typesafe.scalalogging.LazyLogging -import models.annotation.AnnotationDAO import models.dataset._ import models.dataset.credential.CredentialDAO import models.job.JobDAO @@ -50,7 +49,6 @@ class WKRemoteDataStoreController @Inject()( teamDAO: TeamDAO, jobDAO: JobDAO, credentialDAO: CredentialDAO, - annotationDAO: AnnotationDAO, wkSilhouetteEnvironment: WkSilhouetteEnvironment)(implicit ec: ExecutionContext, bodyParsers: PlayBodyParsers) extends Controller with LazyLogging { @@ -211,7 +209,7 @@ class WKRemoteDataStoreController @Inject()( implicit request => dataStoreService.validateAccess(name, key) { _ => for { - _ <- datasetService.deleteDatasetFromDB(request.body)(GlobalAccessContext) + _ <- datasetService.deleteDatasetFromDB(request.body) } yield Ok } } diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index 9594c9681e9..fa068df5675 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -5,7 +5,16 @@ import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Empty, EmptyBox, Fox, FoxImplicits, Full, JsonHelper, TextUtils} import com.scalableminds.webknossos.datastore.helpers.{DataSourceMagInfo, UPath} -import com.scalableminds.webknossos.datastore.models.datasource.{DataSource, DataSourceId, DataSourceStatus, StaticColorLayer, StaticLayer, StaticSegmentationLayer, UnusableDataSource, UsableDataSource} +import com.scalableminds.webknossos.datastore.models.datasource.{ + DataSource, + DataSourceId, + DataSourceStatus, + StaticColorLayer, + StaticLayer, + StaticSegmentationLayer, + UnusableDataSource, + UsableDataSource +} import com.scalableminds.webknossos.datastore.rpc.RPC import com.scalableminds.webknossos.datastore.services.DataSourcePathInfo import com.typesafe.scalalogging.LazyLogging @@ -561,7 +570,7 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, } yield () } - def deleteDatasetFromDB(datasetId: ObjectId)(implicit ctx: DBAccessContext): Fox[Unit] = + def deleteDatasetFromDB(datasetId: ObjectId): Fox[Unit] = for { existingDatasetBox <- datasetDAO.findOne(datasetId)(GlobalAccessContext).shiftBox _ <- existingDatasetBox match { From c3ac8c3866400fa93a02c7142b5bb69ee6383000 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 15 Oct 2025 13:21:19 +0200 Subject: [PATCH 43/62] fix circular dependency --- app/models/storage/UsedStorageService.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/models/storage/UsedStorageService.scala b/app/models/storage/UsedStorageService.scala index bdbefbfbc5b..d4a4f9db436 100644 --- a/app/models/storage/UsedStorageService.scala +++ b/app/models/storage/UsedStorageService.scala @@ -16,7 +16,6 @@ import models.dataset.{ Dataset, DatasetLayerAttachmentsDAO, DatasetMagsDAO, - DatasetService, StorageRelevantDataLayerAttachment, WKRemoteDataStoreClient } @@ -34,7 +33,6 @@ import scala.concurrent.duration._ class UsedStorageService @Inject()(val actorSystem: ActorSystem, val lifecycle: ApplicationLifecycle, organizationDAO: OrganizationDAO, - datasetService: DatasetService, dataStoreDAO: DataStoreDAO, datasetMagDAO: DatasetMagsDAO, datasetLayerAttachmentsDAO: DatasetLayerAttachmentsDAO, @@ -213,7 +211,7 @@ class UsedStorageService @Inject()(val actorSystem: ActorSystem, def refreshStorageReportForDataset(dataset: Dataset): Fox[Unit] = for { _ <- Fox.successful(()) - dataStore <- datasetService.dataStoreFor(dataset) + dataStore <- dataStoreDAO.findOneByName(dataset._dataStore.trim) ?~> "datastore.notFound" _ <- if (dataStore.reportUsedStorageEnabled) { for { organization <- organizationDAO.findOne(dataset._organization) From 7335a0f977e1edb58674dd858e02f9b2394eaaa7 Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 15 Oct 2025 13:36:20 +0200 Subject: [PATCH 44/62] adapt frontend (changed api route) --- app/models/dataset/DatasetService.scala | 2 +- frontend/javascripts/admin/rest_api.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index fa068df5675..c248a5dfb40 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -530,7 +530,7 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, }) } yield () - def deleteUnusableDataset(dataset: Dataset): Fox[Unit] = ??? // TODO + private def deleteUnusableDataset(dataset: Dataset): Fox[Unit] = ??? // TODO def deleteDataset(dataset: Dataset)(implicit ctx: DBAccessContext): Fox[Unit] = if (!dataset.isUsable) { diff --git a/frontend/javascripts/admin/rest_api.ts b/frontend/javascripts/admin/rest_api.ts index 35342c75d03..00505f713fb 100644 --- a/frontend/javascripts/admin/rest_api.ts +++ b/frontend/javascripts/admin/rest_api.ts @@ -1379,7 +1379,7 @@ export async function triggerDatasetClearCache( } export async function deleteDatasetOnDisk(datasetId: string): Promise { - await Request.triggerRequest(`/api/datasets/${datasetId}/deleteOnDisk`, { + await Request.triggerRequest(`/api/datasets/${datasetId}`, { method: "DELETE", }); } From 1a032b2c18bbc44f303490f23f34df42f34755ea Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 16 Oct 2025 11:03:48 +0200 Subject: [PATCH 45/62] WIP find unique paths --- app/models/dataset/Dataset.scala | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index 88a94b11f4c..d2319631bf8 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -896,7 +896,20 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte magInfos = rowsToMagInfos(rows) } yield magInfos - def findPathsUsedOnlyByThisDataset(datasetId: ObjectId): Fox[Seq[UPath]] = ??? // TODO + def findPathsUsedOnlyByThisDataset(datasetId: ObjectId): Fox[Seq[UPath]] = + for { + pathsStr <- run(q""" + SELECT m1.path FROM webknossos.dataset_mags m1 + WHERE m1._dataset = $datasetId + AND NOT EXISTS ( + SELECT m2.path + FROM webknossos.dataset_mags m2 + WHERE m2._dataset != $datasetId + AND m2.path = m1.path + ) + """.as[String]) + paths <- Fox.serialCombined(pathsStr)(UPath.fromString(_).toFox) // TODO box variant? + } yield paths private def parseMagLocator(row: DatasetMagsRow): Fox[MagLocator] = for { From a68404444ad929e3caa813ee6d5f7fcea45adf92 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 16 Oct 2025 11:23:15 +0200 Subject: [PATCH 46/62] also attachments --- app/models/dataset/Dataset.scala | 17 +++++++++++++++-- app/models/dataset/DatasetService.scala | 4 +++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index d2319631bf8..75484923f43 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -908,7 +908,7 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte AND m2.path = m1.path ) """.as[String]) - paths <- Fox.serialCombined(pathsStr)(UPath.fromString(_).toFox) // TODO box variant? + paths <- pathsStr.map(UPath.fromString).toList.toSingleBox("Invalid UPath").toFox } yield paths private def parseMagLocator(row: DatasetMagsRow): Fox[MagLocator] = @@ -1281,7 +1281,20 @@ class DatasetLayerAttachmentsDAO @Inject()(sqlClient: SqlClient)(implicit ec: Ex """.as[StorageRelevantDataLayerAttachment]) } yield storageRelevantAttachments.toList - def findPathsUsedOnlyByThisDataset(datasetId: ObjectId): Fox[Seq[UPath]] = ??? // TODO + def findPathsUsedOnlyByThisDataset(datasetId: ObjectId): Fox[Seq[UPath]] = + for { + pathsStr <- run(q""" + SELECT a1.path FROM webknossos.dataset_layer_attachments a1 + WHERE a1._dataset = $datasetId + AND NOT EXISTS ( + SELECT a2.path + FROM webknossos.dataset_layer_attachments a2 + WHERE a2._dataset != $datasetId + AND a2.path = a1.path + ) + """.as[String]) + paths <- pathsStr.map(UPath.fromString).toList.toSingleBox("Invalid UPath").toFox + } yield paths } class DatasetCoordinateTransformationsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionContext) diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index c248a5dfb40..8af1d8c93ce 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -558,11 +558,13 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, attachmentPathsUsedOnlyByThisDataset <- datasetLayerAttachmentsDAO.findPathsUsedOnlyByThisDataset( dataset._id) pathsUsedOnlyByThisDataset = magPathsUsedOnlyByThisDataset ++ attachmentPathsUsedOnlyByThisDataset + // Note that the datastore only deletes local paths and paths on our managed S3 cloud storage _ <- datastoreClient.deletePaths(pathsUsedOnlyByThisDataset) } yield () } else { for { - _ <- Fox.failure("checks!") + _ <- Fox.failure( + "check that no other dataset’s realpaths are in here (check only datasets from the same datastore)! TODO how to find datastore binaryData dir? paths may be absolute.") _ <- datastoreClient.deleteOnDisk(dataset._id) ?~> "dataset.delete.failed" } yield () } From aa5c99409b9dce8c84fad7c7c15f295d2633c78f Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 23 Oct 2025 09:50:14 +0200 Subject: [PATCH 47/62] wip also handle non-virtual datasets --- app/models/dataset/Dataset.scala | 10 +++++++++- app/models/dataset/DatasetService.scala | 15 +++++++++++++-- app/models/dataset/WKRemoteDataStoreClient.scala | 6 ++++++ conf/application.conf | 4 ++-- .../controllers/DataSourceController.scala | 6 ++++++ webknossos-datastore/conf/datastore.latest.routes | 2 +- 6 files changed, 37 insertions(+), 6 deletions(-) diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index 75484923f43..fc649621515 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -905,12 +905,18 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte SELECT m2.path FROM webknossos.dataset_mags m2 WHERE m2._dataset != $datasetId - AND m2.path = m1.path + AND ( + m2.path = m1.path + OR + m2.realpath = m1.realpath + ) ) """.as[String]) paths <- pathsStr.map(UPath.fromString).toList.toSingleBox("Invalid UPath").toFox } yield paths + def findDatasetsWithMagsInDir(absolutePath: UPath, dataStore: DataStore): Fox[Seq[ObjectId]] = ??? // TODO + private def parseMagLocator(row: DatasetMagsRow): Fox[MagLocator] = for { mag <- parseMag(row.mag) @@ -1295,6 +1301,8 @@ class DatasetLayerAttachmentsDAO @Inject()(sqlClient: SqlClient)(implicit ec: Ex """.as[String]) paths <- pathsStr.map(UPath.fromString).toList.toSingleBox("Invalid UPath").toFox } yield paths + + def findDatasetsWithMagsInDir(absolutePath: UPath, dataStore: DataStore): Fox[Seq[ObjectId]] = ??? // TODO } class DatasetCoordinateTransformationsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionContext) diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index 8af1d8c93ce..d929378f3e5 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -563,8 +563,13 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, } yield () } else { for { - _ <- Fox.failure( - "check that no other dataset’s realpaths are in here (check only datasets from the same datastore)! TODO how to find datastore binaryData dir? paths may be absolute.") + datastoreBaseDirStr <- datastoreClient.getBaseDirAbsolute + datastoreBaseDir <- UPath.fromString(datastoreBaseDirStr).toFox + datasetDir = datastoreBaseDir / dataset._organization / dataset.directoryName + datastore <- dataStoreFor(dataset) + datasetsPointingInHere <- findDatasetsUsingDataFromDir(datasetDir, datastore) + _ <- Fox.fromBool(datasetsPointingInHere.isEmpty) ?~> s"Cannot delete dataset because ${datasetsPointingInHere.length} other datasets reference its data: ${datasetsPointingInHere + .mkString(",")}" _ <- datastoreClient.deleteOnDisk(dataset._id) ?~> "dataset.delete.failed" } yield () } @@ -572,6 +577,12 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, } yield () } + private def findDatasetsUsingDataFromDir(directory: UPath, dataStore: DataStore): Fox[Seq[ObjectId]] = + for { + datasetsWithMagsInDir <- datasetMagsDAO.findDatasetsWithMagsInDir(directory, dataStore) + datasetsWithAttachmentsInDir <- datasetLayerAttachmentsDAO.findDatasetsWithMagsInDir(directory, dataStore) + } yield (datasetsWithMagsInDir ++ datasetsWithAttachmentsInDir).distinct + def deleteDatasetFromDB(datasetId: ObjectId): Fox[Unit] = for { existingDatasetBox <- datasetDAO.findOne(datasetId)(GlobalAccessContext).shiftBox diff --git a/app/models/dataset/WKRemoteDataStoreClient.scala b/app/models/dataset/WKRemoteDataStoreClient.scala index 4398bac0211..2776acf93b2 100644 --- a/app/models/dataset/WKRemoteDataStoreClient.scala +++ b/app/models/dataset/WKRemoteDataStoreClient.scala @@ -126,6 +126,12 @@ class WKRemoteDataStoreClient(dataStore: DataStore, rpc: RPC) extends LazyLoggin .delete() } yield () + // TODO cache? + def getBaseDirAbsolute: Fox[String] = + rpc(s"${dataStore.url}/data/baseDirAbsolute") + .addQueryParam("token", RpcTokenHolder.webknossosToken) + .getWithJsonResponse[String] + def deletePaths(paths: Seq[UPath]): Fox[Unit] = for { _ <- rpc(s"${dataStore.url}/data/datasets/deletePaths") diff --git a/conf/application.conf b/conf/application.conf index e62609240da..53aa20790c9 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -156,8 +156,8 @@ features { taskReopenAllowedInSeconds = 30 allowDeleteDatasets = true # to enable jobs for local development, use "yarn enable-jobs" to also activate it in the database - jobsEnabled = false - voxelyticsEnabled = false + jobsEnabled = true + voxelyticsEnabled = true neuronInferralCostPerGVx = 1 mitochondriaInferralCostPerGVx = 0.5 alignmentCostPerGVx = 0.5 diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index 93d73f4d713..dd8aa5c3da0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -87,6 +87,12 @@ class DataSourceController @Inject()( override def allowRemoteOrigin: Boolean = true + def baseDirAbsolute: Action[AnyContent] = Action.async { implicit request => + accessTokenService.validateAccessFromTokenContext(UserAccessRequest.webknossos) { + Fox.successful(Ok(Json.toJson(dataSourceService.dataBaseDir.toAbsolutePath.toString))) + } + } + def triggerInboxCheckBlocking(organizationId: Option[String]): Action[AnyContent] = Action.async { implicit request => accessTokenService.validateAccessFromTokenContext( organizationId diff --git a/webknossos-datastore/conf/datastore.latest.routes b/webknossos-datastore/conf/datastore.latest.routes index d93b7ee0399..74b1a25d7c9 100644 --- a/webknossos-datastore/conf/datastore.latest.routes +++ b/webknossos-datastore/conf/datastore.latest.routes @@ -104,6 +104,7 @@ POST /datasets/:datasetId/layers/:dataLayerName/segmentStatistics/surfa GET /datasets @com.scalableminds.webknossos.datastore.controllers.DataSourceController.testChunk(resumableChunkNumber: Int, resumableIdentifier: String) POST /datasets @com.scalableminds.webknossos.datastore.controllers.DataSourceController.uploadChunk() GET /datasets/getUnfinishedUploads @com.scalableminds.webknossos.datastore.controllers.DataSourceController.getUnfinishedUploads(organizationName: String) +GET /datasets/baseDirAbsolute @com.scalableminds.webknossos.datastore.controllers.DataSourceController.baseDirAbsolute POST /datasets/reserveUpload @com.scalableminds.webknossos.datastore.controllers.DataSourceController.reserveUpload() POST /datasets/finishUpload @com.scalableminds.webknossos.datastore.controllers.DataSourceController.finishUpload() POST /datasets/cancelUpload @com.scalableminds.webknossos.datastore.controllers.DataSourceController.cancelUpload() @@ -113,7 +114,6 @@ DELETE /datasets/:datasetId/deleteOnDisk DELETE /datasets/deletePaths @com.scalableminds.webknossos.datastore.controllers.DataSourceController.deletePaths() POST /datasets/exploreRemote @com.scalableminds.webknossos.datastore.controllers.DataSourceController.exploreRemoteDataset() POST /datasets/validatePaths @com.scalableminds.webknossos.datastore.controllers.DataSourceController.validatePaths() - DELETE /datasets/:datasetId @com.scalableminds.webknossos.datastore.controllers.DataSourceController.invalidateCache(datasetId: ObjectId) # Actions From 84c49b2ec53ea9a0a503c1bf03ac418ae2edae95 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 23 Oct 2025 10:07:01 +0200 Subject: [PATCH 48/62] check other datasets using deletee --- app/models/dataset/Dataset.scala | 30 +++++++++++++++++++++++-- app/models/dataset/DatasetService.scala | 14 +++++++----- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index fc649621515..ada64f0f74a 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -915,7 +915,20 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte paths <- pathsStr.map(UPath.fromString).toList.toSingleBox("Invalid UPath").toFox } yield paths - def findDatasetsWithMagsInDir(absolutePath: UPath, dataStore: DataStore): Fox[Seq[ObjectId]] = ??? // TODO + def findDatasetsWithMagsInDir(absolutePath: UPath, + dataStore: DataStore, + ignoredDataset: ObjectId): Fox[Seq[ObjectId]] = { + // ensure trailing slash on absolutePath to avoid string prefix false positives + val absolutePathWithTrailingSlash = + if (absolutePath.toString.endsWith("/")) absolutePath.toString else absolutePath.toString + "/" + run(q""" + SELECT d._id FROM webknossos.dataset_mags m + JOIN webknossos.datasets d ON m._dataset = d._id + WHERE starts_with(m.realpath, $absolutePathWithTrailingSlash) + AND d._id != $ignoredDataset + AND d.dataStore = ${dataStore.name.trim} + """.as[ObjectId]) + } private def parseMagLocator(row: DatasetMagsRow): Fox[MagLocator] = for { @@ -1302,7 +1315,20 @@ class DatasetLayerAttachmentsDAO @Inject()(sqlClient: SqlClient)(implicit ec: Ex paths <- pathsStr.map(UPath.fromString).toList.toSingleBox("Invalid UPath").toFox } yield paths - def findDatasetsWithMagsInDir(absolutePath: UPath, dataStore: DataStore): Fox[Seq[ObjectId]] = ??? // TODO + def findDatasetsWithAttachmentsInDir(absolutePath: UPath, + dataStore: DataStore, + ignoredDataset: ObjectId): Fox[Seq[ObjectId]] = { + // ensure trailing slash on absolutePath to avoid string prefix false positives + val absolutePathWithTrailingSlash = + if (absolutePath.toString.endsWith("/")) absolutePath.toString else absolutePath.toString + "/" + run(q""" + SELECT d._id FROM webknossos.dataset_layer_attachments a + JOIN webknossos.datasets d ON a._dataset = d._id + WHERE starts_with(a.path, $absolutePathWithTrailingSlash) + AND d._id != $ignoredDataset + AND d.dataStore = ${dataStore.name.trim} + """.as[ObjectId]) + } } class DatasetCoordinateTransformationsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionContext) diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index d929378f3e5..22949cb9e1a 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -567,8 +567,8 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, datastoreBaseDir <- UPath.fromString(datastoreBaseDirStr).toFox datasetDir = datastoreBaseDir / dataset._organization / dataset.directoryName datastore <- dataStoreFor(dataset) - datasetsPointingInHere <- findDatasetsUsingDataFromDir(datasetDir, datastore) - _ <- Fox.fromBool(datasetsPointingInHere.isEmpty) ?~> s"Cannot delete dataset because ${datasetsPointingInHere.length} other datasets reference its data: ${datasetsPointingInHere + datasetsUsingDataFromThisDir <- findDatasetsUsingDataFromDir(datasetDir, datastore, dataset._id) + _ <- Fox.fromBool(datasetsUsingDataFromThisDir.isEmpty) ?~> s"Cannot delete dataset because ${datasetsUsingDataFromThisDir.length} other datasets reference its data: ${datasetsUsingDataFromThisDir .mkString(",")}" _ <- datastoreClient.deleteOnDisk(dataset._id) ?~> "dataset.delete.failed" } yield () @@ -577,10 +577,14 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, } yield () } - private def findDatasetsUsingDataFromDir(directory: UPath, dataStore: DataStore): Fox[Seq[ObjectId]] = + private def findDatasetsUsingDataFromDir(directory: UPath, + dataStore: DataStore, + ignoredDatasetId: ObjectId): Fox[Seq[ObjectId]] = for { - datasetsWithMagsInDir <- datasetMagsDAO.findDatasetsWithMagsInDir(directory, dataStore) - datasetsWithAttachmentsInDir <- datasetLayerAttachmentsDAO.findDatasetsWithMagsInDir(directory, dataStore) + datasetsWithMagsInDir <- datasetMagsDAO.findDatasetsWithMagsInDir(directory, dataStore, ignoredDatasetId) + datasetsWithAttachmentsInDir <- datasetLayerAttachmentsDAO.findDatasetsWithAttachmentsInDir(directory, + dataStore, + ignoredDatasetId) } yield (datasetsWithMagsInDir ++ datasetsWithAttachmentsInDir).distinct def deleteDatasetFromDB(datasetId: ObjectId): Fox[Unit] = From 0b17368cb1dac300cadcd2e3c7d2d900f9c59f61 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 23 Oct 2025 10:21:45 +0200 Subject: [PATCH 49/62] remove unused code --- .../com/scalableminds/util/io/PathUtils.scala | 4 +- .../controllers/DataSourceController.scala | 1 - .../datastore/helpers/DatasetDeleter.scala | 330 ++---------------- .../services/DataSourceService.scala | 4 +- .../services/DataSourceToDiskWriter.scala | 4 +- .../services/uploading/UploadService.scala | 2 +- 6 files changed, 34 insertions(+), 311 deletions(-) diff --git a/util/src/main/scala/com/scalableminds/util/io/PathUtils.scala b/util/src/main/scala/com/scalableminds/util/io/PathUtils.scala index 264739a12be..bbdfa2a81fe 100644 --- a/util/src/main/scala/com/scalableminds/util/io/PathUtils.scala +++ b/util/src/main/scala/com/scalableminds/util/io/PathUtils.scala @@ -11,9 +11,7 @@ import scala.jdk.CollectionConverters.IteratorHasAsScala import scala.reflect.io.Directory import scala.util.Random -object PathUtils extends PathUtils - -trait PathUtils extends LazyLogging { +object PathUtils extends LazyLogging { private def directoryFilter(path: Path): Boolean = Files.isDirectory(path) && !Files.isHidden(path) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index dd8aa5c3da0..d2d3c0510dc 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -418,7 +418,6 @@ class DataSourceController @Inject()( dataSourceId.directoryName, Some(datasetId), reason = Some("the user wants to delete the dataset")) ?~> "dataset.delete.failed" - } yield Ok } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala index 968080e2fd1..5c5b7fb17e7 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala @@ -1,333 +1,57 @@ package com.scalableminds.webknossos.datastore.helpers +import com.scalableminds.util.io.PathUtils import com.scalableminds.util.objectid.ObjectId -import com.scalableminds.util.tools.{Fox, FoxImplicits, JsonHelper} -import com.scalableminds.webknossos.datastore.models.datasource.{DataSourceId, StaticLayer, UsableDataSource} -import com.scalableminds.webknossos.datastore.services.{DSRemoteWebknossosClient, DataSourceToDiskWriter} +import com.scalableminds.util.tools.{Fox, FoxImplicits, Full} import com.typesafe.scalalogging.LazyLogging -import com.scalableminds.util.tools.Box.tryo -import com.scalableminds.util.tools.{Box, Full} -import org.apache.commons.io.FileUtils -import java.io.File import java.nio.file.{Files, Path} import scala.annotation.tailrec import scala.concurrent.ExecutionContext -trait DatasetDeleter extends LazyLogging with DirectoryConstants with FoxImplicits with DataSourceToDiskWriter { +trait DatasetDeleter extends LazyLogging with DirectoryConstants with FoxImplicits { def dataBaseDir: Path - def existsOnDisk(dataSourceId: DataSourceId, isInConversion: Boolean = false): Boolean = { - val dataSourcePath = - if (isInConversion) - dataBaseDir.resolve(dataSourceId.organizationId).resolve(forConversionDir).resolve(dataSourceId.directoryName) - else dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) - - Files.exists(dataSourcePath) - } - def deleteOnDisk( organizationId: String, datasetName: String, datasetId: Option[ObjectId], // Is only set for datasets that are already registered in WK. In this case, we query WK using this id for symlink paths and move them. isInConversion: Boolean = false, reason: Option[String] = None)(implicit ec: ExecutionContext): Fox[Unit] = { - @tailrec - def deleteWithRetry(sourcePath: Path, targetPath: Path, retryCount: Int = 0): Fox[Unit] = - try { - val deduplicatedTargetPath = - if (retryCount == 0) targetPath else targetPath.resolveSibling(f"${targetPath.getFileName} ($retryCount)") - val path = Files.move(sourcePath, deduplicatedTargetPath) - if (path == null) { - throw new Exception("Deleting dataset failed") - } - logger.info(s"Successfully moved dataset from $sourcePath to $targetPath...") - Fox.successful(()) - } catch { - case _: java.nio.file.FileAlreadyExistsException => deleteWithRetry(sourcePath, targetPath, retryCount + 1) - case e: Exception => Fox.failure(s"Deleting dataset failed: ${e.toString}", Full(e)) - } - - def moveToTrash(organizationId: String, - datasetName: String, - dataSourcePath: Path, - reason: Option[String]): Fox[Unit] = - if (Files.exists(dataSourcePath)) { - val trashPath: Path = dataBaseDir.resolve(organizationId).resolve(trashDir) - val targetPath = trashPath.resolve(datasetName) - new File(trashPath.toString).mkdirs() - - logger.info( - s"Deleting dataset by moving it from $dataSourcePath to $targetPath ${reason.map(r => s"because $r").getOrElse("...")}") - deleteWithRetry(dataSourcePath, targetPath) - } else { - Fox.successful(logger.info( - s"Dataset deletion requested for dataset at $dataSourcePath, but it does not exist. Skipping deletion on disk.")) - } val dataSourcePath = if (isInConversion) dataBaseDir.resolve(organizationId).resolve(forConversionDir).resolve(datasetName) else dataBaseDir.resolve(organizationId).resolve(datasetName) - for { - _ <- Fox.runOptional(datasetId)(d => moveSymlinks(organizationId, datasetName, d)) ?~> "Failed to remake symlinks" - _ <- moveToTrash(organizationId, datasetName, dataSourcePath, reason) - } yield () - } - - def remoteWebknossosClient: DSRemoteWebknossosClient - - // Handle references to layers and mags that are deleted + if (Files.exists(dataSourcePath)) { + val trashPath: Path = dataBaseDir.resolve(organizationId).resolve(trashDir) + val targetPath = trashPath.resolve(datasetName) + PathUtils.ensureDirectory(trashPath) - // TODO remove? - private def moveSymlinks(organizationId: String, datasetName: String, datasetId: ObjectId)( - implicit ec: ExecutionContext) = - for { - dataSourceId <- Fox.successful(DataSourceId(datasetName, organizationId)) - layersAndLinkedMags <- remoteWebknossosClient.fetchPaths(datasetId) - exceptionBoxes = layersAndLinkedMags.map(layerMagLinkInfo => - handleLayerSymlinks(dataSourceId, layerMagLinkInfo.layerName, layerMagLinkInfo.magLinkInfos.toList)) - _ <- Fox.assertNoFailure(exceptionBoxes) ?~> "Failed to move symlinks" - affectedDataSources = layersAndLinkedMags - .flatMap(_.magLinkInfos.map(m => m.linkedMags.map(_.dataSourceId))) - .flatten - _ <- updateDatasourceProperties(affectedDataSources) - } yield () - - private def getFullyLinkedLayers(linkedMags: List[MagLinkInfo]): Seq[(DataSourceId, String)] = { - val allMagsLocal = linkedMags.forall(_.mag.hasLocalData) - val allLinkedDatasetLayers = linkedMags.map(_.linkedMags.map(lm => (lm.dataSourceId, lm.dataLayerName))) - // Get combinations of datasourceId, layerName that link to EVERY mag - val linkedToByAllMags = - if (allLinkedDatasetLayers.isEmpty) Seq() - else allLinkedDatasetLayers.reduce((a, b) => a.intersect(b)) - if (allMagsLocal && linkedToByAllMags.nonEmpty) { - linkedToByAllMags + logger.info(s"Deleting dataset ${datasetId + .map(_.toString + " ") + .getOrElse("")}by moving it from $dataSourcePath to $targetPath ${reason.map(r => s"because $r").getOrElse("...")}") + deleteWithRetry(dataSourcePath, targetPath) } else { - Seq() - } - } - - private def relativizeSymlinkPath(targetPath: Path, originPath: Path): Path = { - val absoluteTargetPath = targetPath.toAbsolutePath - val relativeTargetPath = originPath.getParent.toAbsolutePath.relativize(absoluteTargetPath) - relativeTargetPath - } - - private def getPossibleMagPaths(basePath: Path, magInfo: DataSourceMagInfo): List[Path] = { - val layerPath = basePath - .resolve(magInfo.dataSourceId.organizationId) - .resolve(magInfo.dataSourceId.directoryName) - .resolve(magInfo.dataLayerName) - List(layerPath.resolve(magInfo.mag.toMagLiteral(allowScalar = true)), - layerPath.resolve(magInfo.mag.toMagLiteral(allowScalar = false))) - } - - private def updateDatasourceProperties(dataSourceIds: List[DataSourceId])( - implicit ec: ExecutionContext): Fox[List[Unit]] = - // We need to update locally explored datasets, since they now may have symlinks where previously they only had the - // path property set. - Fox.serialCombined(dataSourceIds)(dataSourceId => { - val dataSourcePath = dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName) - val propertiesPath = dataSourcePath.resolve(UsableDataSource.FILENAME_DATASOURCE_PROPERTIES_JSON) - if (Files.exists(propertiesPath)) { - JsonHelper.parseFromFileAs[UsableDataSource](propertiesPath, dataBaseDir) match { - case Full(dataSource) => - val updatedDataSource = dataSource.copy( - id = dataSourceId, - dataLayers = dataSource.dataLayers.map { - case dl: StaticLayer => - if (dl.mags.forall(_.path.exists(p => p.isLocal && p.isAbsolute))) { - // Setting path to None means using resolution of layer/mag directories to access data - dl.mapped(magMapping = _.copy(path = None)) - } else { - dl - } - case dl => dl - } - ) - // Write properties back - updateDataSourceOnDisk(updatedDataSource, expectExisting = true, validate = false) - case _ => Fox.successful(()) - } - } else Fox.successful(()) - }) - - private def updateMagSymlinks(targetMagPath: Path, linkedMag: DataSourceMagInfo): Unit = { - val linkedMagPaths = getPossibleMagPaths(dataBaseDir, linkedMag) - // Before deleting, check write permissions at linkedMagPath - if (!Files.isWritable(linkedMagPaths.head.getParent)) { - throw new Exception(s"Cannot update symlink at ${linkedMagPaths.head}, no write permissions!") - } - val existingLinkedMagPath = linkedMagPaths.find(p => Files.exists(p) || Files.isSymbolicLink(p)) - - existingLinkedMagPath match { - case Some(linkedMagPath) => - Files.delete(linkedMagPath) - logger.info(s"Deleting symlink and recreating it at $linkedMagPath") - Files.createSymbolicLink(linkedMagPath, relativizeSymlinkPath(targetMagPath, linkedMagPath)) - case None => - val linkedMagPath = linkedMagPaths.head - if (!Files.exists(linkedMagPath) && linkedMag.path == linkedMag.realPath) { - // This is the case for locally explored datasets - // Since locally explored datasets are always fully linked layers when explored, this case can - // only happen if one of the mags was manually edited in the properties file. - Files.createSymbolicLink(linkedMagPath, relativizeSymlinkPath(targetMagPath, linkedMagPath)) - } else { - logger.warn(s"Trying to recreate symlink at mag $linkedMagPath, but it does not exist!") - } + Fox.successful(logger.info( + s"Dataset deletion requested for dataset at $dataSourcePath, but it does not exist. Skipping deletion on disk.")) } } - private def moveLayer(sourceDataSource: DataSourceId, - sourceLayer: String, - fullLayerLinks: Seq[(DataSourceId, String)], - layerMags: List[MagLinkInfo]): Unit = { - // Move layer on disk - val layerPath = - dataBaseDir.resolve(sourceDataSource.organizationId).resolve(sourceDataSource.directoryName).resolve(sourceLayer) - - if (fullLayerLinks.isEmpty) { - throw new IllegalArgumentException( - s"Cannot move layer $sourceLayer from $sourceDataSource, no fully linked layers provided!") - } - - // Select one of the fully linked layers as target to move layer to - // Selection of the first one is arbitrary, is there anything to distinguish between them? - val target = fullLayerLinks.head - val moveToDataSource = target._1 - val moveToDataLayer = target._2 - val targetPath = dataBaseDir - .resolve(moveToDataSource.organizationId) - .resolve(moveToDataSource.directoryName) - .resolve(moveToDataLayer) - - // Before deleting, check write permissions at targetPath - if (!Files.isWritable(targetPath.getParent)) { - throw new Exception(s"Cannot move layer $sourceLayer to $targetPath, no write permissions!") - } - - logger.info( - s"Found complete symlinks to layer; Moving layer $sourceLayer from $sourceDataSource to $moveToDataSource/$moveToDataLayer") - if (Files.exists(targetPath) && Files.isSymbolicLink(targetPath)) { - Files.delete(targetPath) - } - if (Files.exists(targetPath) && Files.isDirectory(targetPath)) { - // This happens when the fully linked layer consists of mag symlinks. The directory exists and is full of symlinked mags. - // We need to delete the directory before moving the layer. - FileUtils.deleteDirectory(targetPath.toFile) - } - Files.move(layerPath, targetPath) - - // All symlinks are now broken, we need to recreate them - // There may be more layers that are "fully linked", where we need to add only one symlink - - fullLayerLinks.tail.foreach { linkedLayer => - val linkedLayerPath = - dataBaseDir.resolve(linkedLayer._1.organizationId).resolve(linkedLayer._1.directoryName).resolve(linkedLayer._2) - // Before deleting, check write permissions at linkedLayerPath - if (!Files.isWritable(linkedLayerPath.getParent)) { - throw new Exception(s"Cannot move layer $sourceLayer to $targetPath, no write permissions!") - } - if (Files.exists(linkedLayerPath) || Files.isSymbolicLink(linkedLayerPath)) { - // Two cases exist here: 1. The layer is a regular directory where each mag is a symlink - // 2. The layer is a symlink to the other layer itself. - // We can handle both by deleting the layer and creating a new symlink. - if (Files.isDirectory(linkedLayerPath)) { // Case 1 - FileUtils.deleteDirectory(linkedLayerPath.toFile) - } else { // Case 2 - Files.delete(linkedLayerPath) - } - logger.info( - s"Deleting existing symlink(s) at $linkedLayerPath linking to $sourceDataSource/$sourceLayer, creating new symlink") - Files.createSymbolicLink(linkedLayerPath, relativizeSymlinkPath(targetPath, linkedLayerPath)) - } else { - if (!Files.exists(linkedLayerPath)) { - // This happens when the layer is a locally explored dataset, where the path is directly written into the properties - // and no layer directory actually exists. - Files.createSymbolicLink(linkedLayerPath, relativizeSymlinkPath(targetPath, linkedLayerPath)) - } else { - // This should not happen, since we got the info from WK that a layer exists here - logger.warn(s"Trying to recreate symlink at layer $linkedLayerPath, but it does not exist!") - } + @tailrec + private def deleteWithRetry(sourcePath: Path, targetPath: Path, retryCount: Int = 0)( + implicit ec: ExecutionContext): Fox[Unit] = + try { + val deduplicatedTargetPath = + if (retryCount == 0) targetPath else targetPath.resolveSibling(f"${targetPath.getFileName} ($retryCount)") + val path = Files.move(sourcePath, deduplicatedTargetPath) + if (path == null) { + throw new Exception("Deleting dataset failed") } + logger.info(s"Successfully moved dataset from $sourcePath to $targetPath...") + Fox.successful(()) + } catch { + case _: java.nio.file.FileAlreadyExistsException => deleteWithRetry(sourcePath, targetPath, retryCount + 1) + case e: Exception => Fox.failure(s"Deleting dataset failed: ${e.toString}", Full(e)) } - // For every mag that linked to this layer, we need to update the symlink - // We need to discard the already handled mags (fully linked layers) - - layerMags.foreach { magLinkInfo => - val mag = magLinkInfo.mag - val newMagPath = - Seq(targetPath.resolve(mag.mag.toMagLiteral(true)), targetPath.resolve(mag.mag.toMagLiteral(false))) - .find(Files.exists(_)) - .getOrElse( - throw new Exception(s"Cleaning up move failed for $mag, no local data found ${targetPath.resolve(mag.mag - .toMagLiteral(true))} or ${targetPath.resolve(mag.mag.toMagLiteral(false))}, failed to create symlink!")) - magLinkInfo.linkedMags - .filter(linkedMag => !fullLayerLinks.contains((linkedMag.dataSourceId, linkedMag.dataLayerName))) // Filter out mags that are fully linked layers, we already handled them - .foreach { linkedMag => - updateMagSymlinks(newMagPath, linkedMag) - } - } - - } - - private def handleLayerSymlinks(dataSourceId: DataSourceId, - layerName: String, - linkedMags: List[MagLinkInfo]): Box[Unit] = - tryo { - val fullyLinkedLayers = getFullyLinkedLayers(linkedMags) - if (fullyLinkedLayers.nonEmpty) { - moveLayer(dataSourceId, layerName, fullyLinkedLayers, linkedMags) - } else { - logger.info(s"Found incomplete symlinks to layer; Moving mags from $dataSourceId to other datasets") - linkedMags.foreach { magLinkInfo => - val magToDelete = magLinkInfo.mag - if (magLinkInfo.linkedMags.nonEmpty) { - if (magToDelete.hasLocalData) { - // Move mag to a different dataset - val magPath = getPossibleMagPaths(dataBaseDir, magToDelete).find(Files.exists(_)).getOrElse { - throw new IllegalArgumentException( - s"Cannot move mag $magToDelete, no local data found at ${magToDelete.path}!") - } - // Select an arbitrary linked mag to move to - val target = magLinkInfo.linkedMags.head - val possibleMagTargetPaths = getPossibleMagPaths(dataBaseDir, target) - - // Before deleting, check write permissions at targetPath - if (!Files.isWritable(possibleMagTargetPaths.head.getParent)) { - throw new Exception( - s"Cannot move mag $magToDelete to ${possibleMagTargetPaths.head.getParent}, no write permissions!") - } - - val targetPathExistingSymlink = possibleMagTargetPaths.find(Files.isSymbolicLink) - targetPathExistingSymlink match { - case Some(targetPath) => - logger.info( - s"Deleting existing symlink at $targetPath linking to ${Files.readSymbolicLink(targetPath)}") - Files.delete(targetPath) - case _ => () - } - val targetPath = targetPathExistingSymlink.getOrElse(possibleMagTargetPaths.head) - Files.move(magPath, targetPath) - - // Move all symlinks to this mag to link to the moved mag - magLinkInfo.linkedMags.tail.foreach { linkedMag => - updateMagSymlinks(targetPath, linkedMag) - } - } else { - // The mag has no local data but there are links to it... - // Mags without local data are either - // 1. remote and thus they have no mags that can be linked to (but also we do not need to delete anything more here) - // 2. are links themselves to other mags. In this case, there can't be any links to this here since they - // would be resolved to the other mag. - // 3. locally explored datasets. They don't have layer directories that could have symlinks to them, so - // this is also not a problem. - // So this should not happen. - logger.warn(s"Trying to move mag $magToDelete, but it has no local data!") - } - } - } - } - } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index 96fad9a85e5..a798029b6c9 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -307,11 +307,13 @@ class DataSourceService @Inject()( for { _ <- Fox.serialCombined(localPaths) { _.toLocalPath.flatMap { - deleteDirectoryRecursively + PathUtils.deleteDirectoryRecursively }.toFox } _ <- managedS3Service.deletePaths(managedS3Paths) } yield () } + def existsOnDisk(dataSourceId: DataSourceId): Boolean = + Files.exists(dataBaseDir.resolve(dataSourceId.organizationId).resolve(dataSourceId.directoryName)) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceToDiskWriter.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceToDiskWriter.scala index 51ab729324d..7d7b775e1b0 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceToDiskWriter.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceToDiskWriter.scala @@ -12,7 +12,7 @@ import java.nio.file.{Files, Path} import scala.concurrent.ExecutionContext import scala.io.Source -trait DataSourceToDiskWriter extends PathUtils with DataSourceValidation with FoxImplicits { +trait DataSourceToDiskWriter extends DataSourceValidation with FoxImplicits { private val propertiesFileName = Path.of(UsableDataSource.FILENAME_DATASOURCE_PROPERTIES_JSON) private val logFileName = Path.of("datasource-properties-backups.log") @@ -27,7 +27,7 @@ trait DataSourceToDiskWriter extends PathUtils with DataSourceValidation with Fo for { _ <- Fox.runIf(validate)(assertValidDataSource(dataSource).toFox) propertiesFile = dataSourcePath.resolve(propertiesFileName) - _ <- Fox.runIf(!expectExisting)(ensureDirectoryBox(dataSourcePath).toFox) + _ <- Fox.runIf(!expectExisting)(PathUtils.ensureDirectoryBox(dataSourcePath).toFox) _ <- Fox.runIf(!expectExisting)(Fox.fromBool(!Files.exists(propertiesFile))) ?~> "dataSource.alreadyPresent" _ <- Fox.runIf(expectExisting)(backupPreviousProperties(dataSourcePath).toFox) ?~> "Could not update datasource-properties.json" dataSourceWithRelativizedPaths = relativizePathsOfDataSource(dataSourcePath, dataSource) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index 5db64024f80..a1121aa2ff9 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -352,7 +352,7 @@ class UploadService @Inject()(dataSourceService: DataSourceService, _ <- assertWithinRequestedFileSizeAndCleanUpOtherwise(uploadDir, uploadId) _ <- checkAllChunksUploaded(uploadId) unpackToDir = unpackToDirFor(dataSourceId) - _ <- ensureDirectoryBox(unpackToDir.getParent).toFox ?~> "dataset.import.fileAccessDenied" + _ <- PathUtils.ensureDirectoryBox(unpackToDir.getParent).toFox ?~> "dataset.import.fileAccessDenied" unpackResult <- unpackDataset(uploadDir, unpackToDir, datasetId).shiftBox _ <- cleanUpUploadedDataset(uploadDir, uploadId) _ <- cleanUpOnFailure(unpackResult, From cb52d47ae0522b532952c5a7d08eceeb498ed0c6 Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 23 Oct 2025 10:44:48 +0200 Subject: [PATCH 50/62] fixes --- app/models/dataset/Dataset.scala | 4 +- app/models/dataset/DatasetService.scala | 59 ++++++++----------- .../dataset/WKRemoteDataStoreClient.scala | 2 +- .../controllers/DataSourceController.scala | 2 +- .../datastore/helpers/DatasetDeleter.scala | 16 +++-- .../services/uploading/UploadService.scala | 8 +-- 6 files changed, 41 insertions(+), 50 deletions(-) diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index ada64f0f74a..9a6476c98db 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -926,7 +926,7 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte JOIN webknossos.datasets d ON m._dataset = d._id WHERE starts_with(m.realpath, $absolutePathWithTrailingSlash) AND d._id != $ignoredDataset - AND d.dataStore = ${dataStore.name.trim} + AND d._datastore = ${dataStore.name.trim} """.as[ObjectId]) } @@ -1326,7 +1326,7 @@ class DatasetLayerAttachmentsDAO @Inject()(sqlClient: SqlClient)(implicit ec: Ex JOIN webknossos.datasets d ON a._dataset = d._id WHERE starts_with(a.path, $absolutePathWithTrailingSlash) AND d._id != $ignoredDataset - AND d.dataStore = ${dataStore.name.trim} + AND d._datastore = ${dataStore.name.trim} """.as[ObjectId]) } } diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index 22949cb9e1a..837836332bb 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -530,15 +530,10 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, }) } yield () - private def deleteUnusableDataset(dataset: Dataset): Fox[Unit] = ??? // TODO - def deleteDataset(dataset: Dataset)(implicit ctx: DBAccessContext): Fox[Unit] = - if (!dataset.isUsable) { - deleteUnusableDataset(dataset) - } else { - for { + for { - /* Find paths not used by other datasets (neither as realpath nor as path), delete those + /* Find paths not used by other datasets (neither as realpath nor as path), delete those (Caution, what if symlink chains go through this dataset? those won’t be detected as realpaths) If virtual: - find paths not used by other datasets (neither as realpath nor as path), delete those @@ -550,32 +545,30 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, - else: - abort Delete in the DB if no annotations reference it, otherwise mark as deleted and clear datasource - */ - datastoreClient <- clientFor(dataset) - _ <- if (dataset.isVirtual) { - for { - magPathsUsedOnlyByThisDataset <- datasetMagsDAO.findPathsUsedOnlyByThisDataset(dataset._id) - attachmentPathsUsedOnlyByThisDataset <- datasetLayerAttachmentsDAO.findPathsUsedOnlyByThisDataset( - dataset._id) - pathsUsedOnlyByThisDataset = magPathsUsedOnlyByThisDataset ++ attachmentPathsUsedOnlyByThisDataset - // Note that the datastore only deletes local paths and paths on our managed S3 cloud storage - _ <- datastoreClient.deletePaths(pathsUsedOnlyByThisDataset) - } yield () - } else { - for { - datastoreBaseDirStr <- datastoreClient.getBaseDirAbsolute - datastoreBaseDir <- UPath.fromString(datastoreBaseDirStr).toFox - datasetDir = datastoreBaseDir / dataset._organization / dataset.directoryName - datastore <- dataStoreFor(dataset) - datasetsUsingDataFromThisDir <- findDatasetsUsingDataFromDir(datasetDir, datastore, dataset._id) - _ <- Fox.fromBool(datasetsUsingDataFromThisDir.isEmpty) ?~> s"Cannot delete dataset because ${datasetsUsingDataFromThisDir.length} other datasets reference its data: ${datasetsUsingDataFromThisDir - .mkString(",")}" - _ <- datastoreClient.deleteOnDisk(dataset._id) ?~> "dataset.delete.failed" - } yield () - } - _ <- deleteDatasetFromDB(dataset._id) - } yield () - } + */ + datastoreClient <- clientFor(dataset) + _ <- if (dataset.isVirtual) { + for { + magPathsUsedOnlyByThisDataset <- datasetMagsDAO.findPathsUsedOnlyByThisDataset(dataset._id) + attachmentPathsUsedOnlyByThisDataset <- datasetLayerAttachmentsDAO.findPathsUsedOnlyByThisDataset(dataset._id) + pathsUsedOnlyByThisDataset = magPathsUsedOnlyByThisDataset ++ attachmentPathsUsedOnlyByThisDataset + // Note that the datastore only deletes local paths and paths on our managed S3 cloud storage + _ <- datastoreClient.deletePaths(pathsUsedOnlyByThisDataset) + } yield () + } else { + for { + datastoreBaseDirStr <- datastoreClient.getBaseDirAbsolute + datastoreBaseDir <- UPath.fromString(datastoreBaseDirStr).toFox + datasetDir = datastoreBaseDir / dataset._organization / dataset.directoryName + datastore <- dataStoreFor(dataset) + datasetsUsingDataFromThisDir <- findDatasetsUsingDataFromDir(datasetDir, datastore, dataset._id) + _ <- Fox.fromBool(datasetsUsingDataFromThisDir.isEmpty) ?~> s"Cannot delete dataset because ${datasetsUsingDataFromThisDir.length} other datasets reference its data: ${datasetsUsingDataFromThisDir + .mkString(",")}" + _ <- datastoreClient.deleteOnDisk(dataset._id) ?~> "dataset.delete.failed" + } yield () + } + _ <- deleteDatasetFromDB(dataset._id) + } yield () private def findDatasetsUsingDataFromDir(directory: UPath, dataStore: DataStore, diff --git a/app/models/dataset/WKRemoteDataStoreClient.scala b/app/models/dataset/WKRemoteDataStoreClient.scala index 2776acf93b2..eaa8939c33b 100644 --- a/app/models/dataset/WKRemoteDataStoreClient.scala +++ b/app/models/dataset/WKRemoteDataStoreClient.scala @@ -128,7 +128,7 @@ class WKRemoteDataStoreClient(dataStore: DataStore, rpc: RPC) extends LazyLoggin // TODO cache? def getBaseDirAbsolute: Fox[String] = - rpc(s"${dataStore.url}/data/baseDirAbsolute") + rpc(s"${dataStore.url}/data/datasets/baseDirAbsolute") .addQueryParam("token", RpcTokenHolder.webknossosToken) .getWithJsonResponse[String] diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala index d2d3c0510dc..23f8fb30587 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/controllers/DataSourceController.scala @@ -414,9 +414,9 @@ class DataSourceController @Inject()( dataSource <- dsRemoteWebknossosClient.getDataSource(datasetId) ~> NOT_FOUND dataSourceId = dataSource.id _ <- dataSourceService.deleteOnDisk( + datasetId, dataSourceId.organizationId, dataSourceId.directoryName, - Some(datasetId), reason = Some("the user wants to delete the dataset")) ?~> "dataset.delete.failed" } yield Ok } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala index 5c5b7fb17e7..379004293c6 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala @@ -11,12 +11,11 @@ import scala.concurrent.ExecutionContext trait DatasetDeleter extends LazyLogging with DirectoryConstants with FoxImplicits { def dataBaseDir: Path - def deleteOnDisk( - organizationId: String, - datasetName: String, - datasetId: Option[ObjectId], // Is only set for datasets that are already registered in WK. In this case, we query WK using this id for symlink paths and move them. - isInConversion: Boolean = false, - reason: Option[String] = None)(implicit ec: ExecutionContext): Fox[Unit] = { + def deleteOnDisk(datasetId: ObjectId, + organizationId: String, + datasetName: String, + isInConversion: Boolean = false, + reason: Option[String] = None)(implicit ec: ExecutionContext): Fox[Unit] = { val dataSourcePath = if (isInConversion) dataBaseDir.resolve(organizationId).resolve(forConversionDir).resolve(datasetName) @@ -27,9 +26,8 @@ trait DatasetDeleter extends LazyLogging with DirectoryConstants with FoxImplici val targetPath = trashPath.resolve(datasetName) PathUtils.ensureDirectory(trashPath) - logger.info(s"Deleting dataset ${datasetId - .map(_.toString + " ") - .getOrElse("")}by moving it from $dataSourcePath to $targetPath ${reason.map(r => s"because $r").getOrElse("...")}") + logger.info( + s"Deleting dataset $datasetId by moving it from $dataSourcePath to $targetPath ${reason.map(r => s"because $r").getOrElse("...")}") deleteWithRetry(dataSourcePath, targetPath) } else { Fox.successful(logger.info( diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala index a1121aa2ff9..bbb250b619b 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/uploading/UploadService.scala @@ -557,17 +557,17 @@ class UploadService @Inject()(dataSourceService: DataSourceService, case Full(_) => Fox.successful(()) case Empty => - deleteOnDisk(dataSourceId.organizationId, + deleteOnDisk(datasetId, + dataSourceId.organizationId, dataSourceId.directoryName, - None, needsConversion, Some("the upload failed")) Fox.failure(s"Unknown error $label") case Failure(msg, e, _) => logger.warn(s"Error while $label: $msg, $e") - deleteOnDisk(dataSourceId.organizationId, + deleteOnDisk(datasetId, + dataSourceId.organizationId, dataSourceId.directoryName, - None, needsConversion, Some("the upload failed")) remoteWebknossosClient.deleteDataset(datasetId) From 6bda79d0e0fd3248622f7be0f90601b9b78793de Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 23 Oct 2025 11:43:28 +0200 Subject: [PATCH 51/62] reset application.conf --- conf/application.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/application.conf b/conf/application.conf index 6dec33dfa5d..50aed2e6cc0 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -157,8 +157,8 @@ features { taskReopenAllowedInSeconds = 30 allowDeleteDatasets = true # to enable jobs for local development, use "yarn enable-jobs" to also activate it in the database - jobsEnabled = true - voxelyticsEnabled = true + jobsEnabled = false + voxelyticsEnabled = false neuronInferralCostPerGVx = 1 mitochondriaInferralCostPerGVx = 0.5 alignmentCostPerGVx = 0.5 From c50f346831ca1675de755a07533151443b1700f7 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 27 Oct 2025 12:00:18 +0100 Subject: [PATCH 52/62] extract stuff to S3UriUtils --- app/models/dataset/DatasetService.scala | 14 ----- .../dataset/WKRemoteDataStoreClient.scala | 3 +- .../datastore/datavault/S3DataVault.scala | 53 +++---------------- .../datastore/helpers/MagLinkInfo.scala | 2 - .../datastore/helpers/S3UriUtils.scala | 50 +++++++++++++++++ .../datastore/services/ManagedS3Service.scala | 12 ++--- 6 files changed, 62 insertions(+), 72 deletions(-) create mode 100644 webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/S3UriUtils.scala diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index 968718139d2..4b63fb4281b 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -532,20 +532,6 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, def deleteDataset(dataset: Dataset)(implicit ctx: DBAccessContext): Fox[Unit] = for { - - /* Find paths not used by other datasets (neither as realpath nor as path), delete those - (Caution, what if symlink chains go through this dataset? those won’t be detected as realpaths) - If virtual: - - find paths not used by other datasets (neither as realpath nor as path), delete those - If not virtual: - - for path in paths: - - find datasets with realpaths pointing to those paths - - if no such datasets, - - delete on disk, no rewriting symlinks - - else: - - abort - Delete in the DB if no annotations reference it, otherwise mark as deleted and clear datasource - */ datastoreClient <- clientFor(dataset) _ <- if (dataset.isVirtual) { for { diff --git a/app/models/dataset/WKRemoteDataStoreClient.scala b/app/models/dataset/WKRemoteDataStoreClient.scala index eaa8939c33b..36256e7921c 100644 --- a/app/models/dataset/WKRemoteDataStoreClient.scala +++ b/app/models/dataset/WKRemoteDataStoreClient.scala @@ -126,8 +126,7 @@ class WKRemoteDataStoreClient(dataStore: DataStore, rpc: RPC) extends LazyLoggin .delete() } yield () - // TODO cache? - def getBaseDirAbsolute: Fox[String] = + lazy val getBaseDirAbsolute: Fox[String] = rpc(s"${dataStore.url}/data/datasets/baseDirAbsolute") .addQueryParam("token", RpcTokenHolder.webknossosToken) .getWithJsonResponse[String] diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala index f82a833041a..ff88d7f29c1 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/datavault/S3DataVault.scala @@ -8,8 +8,8 @@ import com.scalableminds.webknossos.datastore.storage.{ S3AccessKeyCredential } import com.scalableminds.util.tools.Box.tryo -import com.scalableminds.util.tools.{Box, Empty, Full, Failure => BoxFailure} -import com.scalableminds.webknossos.datastore.helpers.UPath +import com.scalableminds.util.tools.{Empty, Full, Failure => BoxFailure} +import com.scalableminds.webknossos.datastore.helpers.{S3UriUtils, UPath} import org.apache.commons.lang3.builder.HashCodeBuilder import play.api.libs.ws.WSClient import software.amazon.awssdk.auth.credentials.{ @@ -49,7 +49,7 @@ class S3DataVault(s3AccessKeyCredential: Option[S3AccessKeyCredential], implicit val ec: ExecutionContext) extends DataVault with FoxImplicits { - private lazy val bucketName = S3DataVault.hostBucketFromUri(uri) match { + private lazy val bucketName = S3UriUtils.hostBucketFromUri(uri) match { case Some(value) => value case None => throw new Exception(s"Could not parse S3 bucket for ${uri.toString}") } @@ -109,7 +109,7 @@ class S3DataVault(s3AccessKeyCredential: Option[S3AccessKeyCredential], implicit ec: ExecutionContext, tc: TokenContext): Fox[(Array[Byte], Encoding.Value)] = for { - objectKey <- S3DataVault.objectKeyFromUri(path.toRemoteUriUnsafe).toFox + objectKey <- S3UriUtils.objectKeyFromUri(path.toRemoteUriUnsafe).toFox request = range match { case StartEnd(r) => getRangeRequest(bucketName, objectKey, r) case SuffixLength(l) => getSuffixRangeRequest(bucketName, objectKey, l) @@ -121,7 +121,7 @@ class S3DataVault(s3AccessKeyCredential: Option[S3AccessKeyCredential], override def listDirectory(path: VaultPath, maxItems: Int)(implicit ec: ExecutionContext): Fox[List[VaultPath]] = for { - prefixKey <- S3DataVault.objectKeyFromUri(path.toRemoteUriUnsafe).toFox + prefixKey <- S3UriUtils.objectKeyFromUri(path.toRemoteUriUnsafe).toFox s3SubPrefixKeys <- getObjectSummaries(bucketName, prefixKey, maxItems) vaultPaths <- tryo(s3SubPrefixKeys.map(key => new VaultPath(UPath.fromStringUnsafe(s"${uri.getScheme}://$bucketName/$key"), this))).toFox @@ -159,7 +159,7 @@ class S3DataVault(s3AccessKeyCredential: Option[S3AccessKeyCredential], } for { - rawPrefix <- S3DataVault.objectKeyFromUri(path.toRemoteUriUnsafe).toFox + rawPrefix <- S3UriUtils.objectKeyFromUri(path.toRemoteUriUnsafe).toFox // add a trailing slash only if it's missing prefixKey = if (rawPrefix.endsWith("/")) rawPrefix else rawPrefix + "/" client <- clientFox @@ -192,42 +192,6 @@ object S3DataVault { new S3DataVault(credential, remoteSourceDescriptor.toUriUnsafe, ws, ec) } - // TODO: Move non private methods to trait? - def hostBucketFromUri(uri: URI): Option[String] = { - val host = uri.getHost - if (isShortStyle(uri)) { // assume host is omitted from uri, shortcut form s3://bucket/key - Some(host) - } else if (isVirtualHostedStyle(uri)) { - Some(host.substring(0, host.length - ".s3.amazonaws.com".length)) - } else if (isPathStyle(uri)) { - Some(uri.getPath.substring(1).split("/")(0)) - } else { - None - } - } - - // https://bucket-name.s3.region-code.amazonaws.com/key-name - private def isVirtualHostedStyle(uri: URI): Boolean = - uri.getHost.endsWith(".s3.amazonaws.com") - - // https://s3.region-code.amazonaws.com/bucket-name/key-name - private def isPathStyle(uri: URI): Boolean = - uri.getHost.matches("s3(.[\\w\\-_]+)?.amazonaws.com") || - (!uri.getHost.contains("amazonaws.com") && uri.getHost.contains(".")) - - // S3://bucket-name/key-name - private def isShortStyle(uri: URI): Boolean = - !uri.getHost.contains(".") - - def objectKeyFromUri(uri: URI): Box[String] = - if (isVirtualHostedStyle(uri)) { - Full(uri.getPath) - } else if (isPathStyle(uri)) { - Full(uri.getPath.substring(1).split("/").tail.mkString("/")) - } else if (isShortStyle(uri)) { - Full(uri.getPath.tail) - } else BoxFailure(s"Not a valid s3 uri: $uri") - private def getCredentialsProvider(credentialOpt: Option[S3AccessKeyCredential]): AwsCredentialsProvider = credentialOpt match { case Some(s3AccessKeyCredential: S3AccessKeyCredential) => @@ -242,9 +206,6 @@ object S3DataVault { AnonymousCredentialsProvider.create() } - private def isNonAmazonHost(uri: URI): Boolean = - (isPathStyle(uri) && !uri.getHost.endsWith(".amazonaws.com")) || uri.getHost == "localhost" - private def determineProtocol(uri: URI, ws: WSClient)(implicit ec: ExecutionContext): Fox[String] = { // If the endpoint supports HTTPS, use it. Otherwise, use HTTP. val httpsUri = new URI("https", uri.getAuthority, "", "", "") @@ -263,7 +224,7 @@ object S3DataVault { implicit ec: ExecutionContext): Fox[S3AsyncClient] = { val basic = S3AsyncClient.builder().credentialsProvider(getCredentialsProvider(credentialOpt)).crossRegionAccessEnabled(true) - if (isNonAmazonHost(uri)) { + if (S3UriUtils.isNonAmazonHost(uri)) { for { protocol <- determineProtocol(uri, ws) } yield diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/MagLinkInfo.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/MagLinkInfo.scala index 2b07d1a2779..c5076828bce 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/MagLinkInfo.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/MagLinkInfo.scala @@ -26,5 +26,3 @@ case class LayerMagLinkInfo(layerName: String, magLinkInfos: Seq[MagLinkInfo]) object LayerMagLinkInfo { implicit val jsonFormat: Format[LayerMagLinkInfo] = Json.format[LayerMagLinkInfo] } - -// TODO remove? diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/S3UriUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/S3UriUtils.scala new file mode 100644 index 00000000000..50dde28c970 --- /dev/null +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/S3UriUtils.scala @@ -0,0 +1,50 @@ +package com.scalableminds.webknossos.datastore.helpers + +import com.scalableminds.util.tools.{Box, Full, Failure} + +import java.net.URI + +object S3UriUtils { + + def hostBucketFromUri(uri: URI): Option[String] = { + val host = uri.getHost + if (isShortStyle(uri)) { // assume host is omitted from uri, shortcut form s3://bucket/key + Some(host) + } else if (isVirtualHostedStyle(uri)) { + Some(host.substring(0, host.length - ".s3.amazonaws.com".length)) + } else if (isPathStyle(uri)) { + Some(uri.getPath.substring(1).split("/")(0)) + } else { + None + } + } + + def hostBucketFromUpath(path: UPath): Option[String] = + hostBucketFromUri(path.toRemoteUriUnsafe) + + // https://bucket-name.s3.region-code.amazonaws.com/key-name + private def isVirtualHostedStyle(uri: URI): Boolean = + uri.getHost.endsWith(".s3.amazonaws.com") + + // https://s3.region-code.amazonaws.com/bucket-name/key-name + private def isPathStyle(uri: URI): Boolean = + uri.getHost.matches("s3(.[\\w\\-_]+)?.amazonaws.com") || + (!uri.getHost.contains("amazonaws.com") && uri.getHost.contains(".")) + + // S3://bucket-name/key-name + private def isShortStyle(uri: URI): Boolean = + !uri.getHost.contains(".") + + def objectKeyFromUri(uri: URI): Box[String] = + if (isVirtualHostedStyle(uri)) { + Full(uri.getPath) + } else if (isPathStyle(uri)) { + Full(uri.getPath.substring(1).split("/").tail.mkString("/")) + } else if (isShortStyle(uri)) { + Full(uri.getPath.tail) + } else Failure(s"Not a valid s3 uri: $uri") + + def isNonAmazonHost(uri: URI): Boolean = + (isPathStyle(uri) && !uri.getHost.endsWith(".amazonaws.com")) || uri.getHost == "localhost" + +} diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala index 11a9343ce5d..862ba40027d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala @@ -3,8 +3,7 @@ package com.scalableminds.webknossos.datastore.services import com.scalableminds.util.tools.{Box, Fox, FoxImplicits} import com.scalableminds.util.tools.Box.tryo import com.scalableminds.webknossos.datastore.DataStoreConfig -import com.scalableminds.webknossos.datastore.datavault.S3DataVault -import com.scalableminds.webknossos.datastore.helpers.{PathSchemes, UPath} +import com.scalableminds.webknossos.datastore.helpers.{PathSchemes, S3UriUtils, UPath} import com.scalableminds.webknossos.datastore.storage.{CredentialConfigReader, S3AccessKeyCredential} import com.typesafe.scalalogging.LazyLogging import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} @@ -38,7 +37,7 @@ class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) extends FoxIm } lazy val s3UploadBucketOpt: Option[String] = - S3DataVault.hostBucketFromUri(new URI(dataStoreConfig.Datastore.S3Upload.credentialName)) + S3UriUtils.hostBucketFromUri(new URI(dataStoreConfig.Datastore.S3Upload.credentialName)) private lazy val s3UploadEndpoint: URI = { val credentialUri = new URI(dataStoreConfig.Datastore.S3Upload.credentialName) @@ -76,7 +75,7 @@ class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) extends FoxIm } yield S3TransferManager.builder().s3Client(client).build() def deletePaths(paths: Seq[UPath])(implicit ec: ExecutionContext): Fox[Unit] = { - val pathsByBucket: Map[Option[String], Seq[UPath]] = paths.groupBy(bucketForS3UPath) + val pathsByBucket: Map[Option[String], Seq[UPath]] = paths.groupBy(S3UriUtils.hostBucketFromUpath) for { _ <- Fox.serialCombined(pathsByBucket.keys) { bucket: Option[String] => deleteS3PathsOnBucket(bucket, pathsByBucket(bucket)) @@ -89,7 +88,7 @@ class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) extends FoxIm for { bucket <- bucketOpt.toFox ?~> "Could not determine S3 bucket from UPath" s3Client <- s3ClientBox.toFox ?~> "No managed s3 client configured" - prefixes <- Fox.combined(paths.map(path => S3DataVault.objectKeyFromUri(path.toRemoteUriUnsafe).toFox)) + prefixes <- Fox.combined(paths.map(path => S3UriUtils.objectKeyFromUri(path.toRemoteUriUnsafe).toFox)) keys: Seq[String] <- Fox.serialCombined(prefixes)(listKeysAtPrefix(s3Client, bucket, _)).map(_.flatten) uniqueKeys = keys.distinct _ = logger.info(s"Deleting ${uniqueKeys.length} objects from managed S3 bucket $bucket") @@ -150,9 +149,6 @@ class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) extends FoxIm res } - private def bucketForS3UPath(path: UPath): Option[String] = - S3DataVault.hostBucketFromUri(path.toRemoteUriUnsafe) - def pathIsInManagedS3(path: UPath): Boolean = // TODO guard against string prefix false positives path.getScheme.contains(PathSchemes.schemeS3) && globalCredentials.exists(c => path.toString.startsWith(c.name)) From beff7d7165c9f85a6806c9014c31f9044379c8c5 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 27 Oct 2025 13:01:12 +0100 Subject: [PATCH 53/62] use upath for prefix check --- .../webknossos/datastore/services/ManagedS3Service.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala index 862ba40027d..7b31d34bc74 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala @@ -150,7 +150,7 @@ class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) extends FoxIm } def pathIsInManagedS3(path: UPath): Boolean = - // TODO guard against string prefix false positives - path.getScheme.contains(PathSchemes.schemeS3) && globalCredentials.exists(c => path.toString.startsWith(c.name)) + path.getScheme.contains(PathSchemes.schemeS3) && globalCredentials.exists(c => + UPath.fromString(c.name).map(path.startsWith).getOrElse(false)) } From b91144fc0566be4f275c3d14da1e2ab11e57a01f Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 27 Oct 2025 13:27:33 +0100 Subject: [PATCH 54/62] avoid string prefix false positives in startsWith --- app/controllers/DatasetController.scala | 18 +++++++++++------- test/backend/UPathTestSuite.scala | 7 +++++++ .../webknossos/datastore/helpers/UPath.scala | 15 +++++++++++++-- .../datastore/services/ManagedS3Service.scala | 3 ++- 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/app/controllers/DatasetController.scala b/app/controllers/DatasetController.scala index db00d0566a0..03e4908e3c8 100755 --- a/app/controllers/DatasetController.scala +++ b/app/controllers/DatasetController.scala @@ -583,13 +583,17 @@ class DatasetController @Inject()(userService: UserService, def delete(datasetId: ObjectId): Action[AnyContent] = sil.SecuredAction.async { implicit request => - for { - dataset <- datasetDAO.findOne(datasetId) ?~> notFoundMessage(datasetId.toString) ~> NOT_FOUND - _ <- Fox.fromBool(conf.Features.allowDeleteDatasets) ?~> "dataset.delete.disabled" - _ <- Fox.assertTrue(datasetService.isEditableBy(dataset, Some(request.identity))) ?~> "notAllowed" ~> FORBIDDEN - _ <- Fox.fromBool(request.identity.isAdminOf(dataset._organization)) ?~> "delete.mustBeOrganizationAdmin" ~> FORBIDDEN - _ <- datasetService.deleteDataset(dataset) - } yield Ok + log() { + for { + dataset <- datasetDAO.findOne(datasetId) ?~> notFoundMessage(datasetId.toString) ~> NOT_FOUND + _ <- Fox.fromBool(conf.Features.allowDeleteDatasets) ?~> "dataset.delete.disabled" + _ <- Fox.assertTrue(datasetService.isEditableBy(dataset, Some(request.identity))) ?~> "notAllowed" ~> FORBIDDEN + _ <- Fox.fromBool(request.identity.isAdminOf(dataset._organization)) ?~> "delete.mustBeOrganizationAdmin" ~> FORBIDDEN + _ = logger.info( + s"Deleting dataset $datasetId (isVirtual=${dataset.isVirtual}) as requested by user ${request.identity._id}...") + _ <- datasetService.deleteDataset(dataset) + } yield Ok + } } def compose(): Action[ComposeRequest] = diff --git a/test/backend/UPathTestSuite.scala b/test/backend/UPathTestSuite.scala index b1de1d825d4..b8ef5f806e4 100644 --- a/test/backend/UPathTestSuite.scala +++ b/test/backend/UPathTestSuite.scala @@ -129,6 +129,8 @@ class UPathTestSuite extends PlaySpec { "correctly answer startsWith" in { assert(UPath.fromStringUnsafe("relative/somewhere").startsWith(UPath.fromStringUnsafe("relative"))) assert(!UPath.fromStringUnsafe("relative/somewhere").startsWith(UPath.fromStringUnsafe("elsewhere"))) + // startsWith compares actual parents, not string prefix! + assert(!UPath.fromStringUnsafe("relativeElsewhere").startsWith(UPath.fromStringUnsafe("relative"))) assert(UPath.fromStringUnsafe("/absolute/somewhere").startsWith(UPath.fromStringUnsafe("/absolute"))) assert(!UPath.fromStringUnsafe("/absolute/somewhere").startsWith(UPath.fromStringUnsafe("/elsewhere"))) assert(!UPath.fromStringUnsafe("/absolute/somewhere").startsWith(UPath.fromStringUnsafe("https://example.com"))) @@ -136,6 +138,11 @@ class UPathTestSuite extends PlaySpec { UPath .fromStringUnsafe("https://example.com/path/somewhere") .startsWith(UPath.fromStringUnsafe("https://example.com/path"))) + // startsWith compares actual parents, not string prefix! + assert( + !UPath + .fromStringUnsafe("https://example.com/pathSomewhereElse") + .startsWith(UPath.fromStringUnsafe("https://example.com/path"))) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala index 3d8f9ee2c63..3494a0d92f2 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala @@ -7,6 +7,7 @@ import play.api.libs.json.{Format, JsError, JsResult, JsString, JsSuccess, JsVal import java.net.URI import java.nio.file.Path +import scala.collection.mutable.ListBuffer trait UPath { def toRemoteUriUnsafe: URI @@ -179,10 +180,20 @@ private case class RemoteUPath(scheme: String, segments: Seq[String]) extends UP override def basename: String = segments.findLast(_.nonEmpty).getOrElse("") - override def parent: UPath = + override def parent: RemoteUPath = // < 2 check to avoid deleting “authority” (hostname:port) if (segments.length < 2) this else RemoteUPath(scheme, segments.dropRight(1)) + def parents: Seq[RemoteUPath] = { + val listBuffer = ListBuffer[RemoteUPath]() + var current = this + while (current.segments.length >= 2) { + listBuffer.addOne(current) + current = current.parent + } + listBuffer.toSeq + } + override def getScheme: Option[String] = Some(scheme) override def toRemoteUriUnsafe: URI = new URI(toString) @@ -197,7 +208,7 @@ private case class RemoteUPath(scheme: String, segments: Seq[String]) extends UP def startsWith(other: UPath): Boolean = other match { case otherRemote: RemoteUPath => - this.normalize.toString.startsWith(otherRemote.normalize.toString) + this.normalize.parents.contains(otherRemote.normalize) case _ => false } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala index 7b31d34bc74..0cbe848cbf7 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala @@ -91,8 +91,9 @@ class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) extends FoxIm prefixes <- Fox.combined(paths.map(path => S3UriUtils.objectKeyFromUri(path.toRemoteUriUnsafe).toFox)) keys: Seq[String] <- Fox.serialCombined(prefixes)(listKeysAtPrefix(s3Client, bucket, _)).map(_.flatten) uniqueKeys = keys.distinct - _ = logger.info(s"Deleting ${uniqueKeys.length} objects from managed S3 bucket $bucket") + _ = logger.info(s"Deleting ${uniqueKeys.length} objects from managed S3 bucket $bucket...") _ <- Fox.serialCombined(uniqueKeys.grouped(1000).toSeq)(deleteBatch(s3Client, bucket, _)).map(_ => ()) + _ = logger.info(s"Successfully deleted ${uniqueKeys.length} objects from managed S3 bucket $bucket.") } yield () private def deleteBatch(s3Client: S3AsyncClient, bucket: String, keys: Seq[String])( From 376ef47a0fda1a2534c214fc2605ead9adc0f3c8 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 27 Oct 2025 13:33:34 +0100 Subject: [PATCH 55/62] changelog --- unreleased_changes/8924.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 unreleased_changes/8924.md diff --git a/unreleased_changes/8924.md b/unreleased_changes/8924.md new file mode 100644 index 00000000000..72437c593cd --- /dev/null +++ b/unreleased_changes/8924.md @@ -0,0 +1,5 @@ +### Added +- Datasets stored on managed S3 can now also be deleted there. + +### Changed +- Deleting datasets on disk whose layers are still referenced by symlinks of other datasets is now blocked. From 768402e66caf0c9f380c9d4745229c7628d72e4b Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 27 Oct 2025 15:39:39 +0100 Subject: [PATCH 56/62] remove unused route --- app/models/dataset/Dataset.scala | 34 ++----------------- app/models/dataset/DatasetService.scala | 22 ------------ conf/webknossos.latest.routes | 1 - .../services/DSRemoteWebknossosClient.scala | 5 --- 4 files changed, 3 insertions(+), 59 deletions(-) diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index 9a6476c98db..53a0c0accef 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -7,7 +7,7 @@ import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Fox, JsonHelper} import com.scalableminds.webknossos.datastore.dataformats.MagLocator import com.scalableminds.webknossos.datastore.datareaders.AxisOrder -import com.scalableminds.webknossos.datastore.helpers.{DataSourceMagInfo, UPath} +import com.scalableminds.webknossos.datastore.helpers.UPath import com.scalableminds.webknossos.datastore.models.{LengthUnit, VoxelSize} import com.scalableminds.webknossos.datastore.models.datasource.DatasetViewConfiguration.DatasetViewConfiguration import com.scalableminds.webknossos.datastore.models.datasource.LayerViewConfiguration.LayerViewConfiguration @@ -816,8 +816,8 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte val clearQuery = q"DELETE FROM webknossos.dataset_mags WHERE _dataset = $datasetId".asUpdate val insertQueries = dataLayers.flatMap { layer: StaticLayer => layer.mags.map { mag => - q"""INSERT INTO webknossos.dataset_mags(_dataset, dataLayerName, mag, path, realPath, axisOrder, channelIndex, credentialId) - VALUES($datasetId, ${layer.name}, ${mag.mag}, ${mag.path}, ${mag.path}, ${mag.axisOrder.map(Json.toJson(_))}, ${mag.channelIndex}, ${mag.credentialId}) + q"""INSERT INTO webknossos.dataset_mags(_dataset, dataLayerName, mag, path, axisOrder, channelIndex, credentialId) + VALUES($datasetId, ${layer.name}, ${mag.mag}, ${mag.path}, ${mag.axisOrder.map(Json.toJson(_))}, ${mag.channelIndex}, ${mag.credentialId}) """.asUpdate } } @@ -868,34 +868,6 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte } ) - private def rowsToMagInfos(rows: Vector[DataSourceMagRow]): List[DataSourceMagInfo] = { - val mags = rows.map(_.mag) - val dataSources = rows.map(row => DataSourceId(row.directoryName, row._organization)) - rows.toList.zip(mags).zip(dataSources).map { - case ((row, mag), dataSource) => - DataSourceMagInfo(dataSource, row.dataLayerName, mag, row.path, row.realPath, row.hasLocalData) - } - } - - def findPathsForDatasetAndDatalayer(datasetId: ObjectId, dataLayerName: String): Fox[List[DataSourceMagInfo]] = - for { - rows <- run(q"""SELECT _dataset, dataLayerName, mag, path, realPath, hasLocalData, _organization, directoryName - FROM webknossos.dataset_mags - INNER JOIN webknossos.datasets ON webknossos.dataset_mags._dataset = webknossos.datasets._id - WHERE _dataset = $datasetId - AND dataLayerName = $dataLayerName""".as[DataSourceMagRow]) - magInfos = rowsToMagInfos(rows) - } yield magInfos - - def findAllByRealPath(realPath: String): Fox[List[DataSourceMagInfo]] = - for { - rows <- run(q"""SELECT _dataset, dataLayerName, mag, path, realPath, hasLocalData, _organization, directoryName - FROM webknossos.dataset_mags - INNER JOIN webknossos.datasets ON webknossos.dataset_mags._dataset = webknossos.datasets._id - WHERE realPath = $realPath""".as[DataSourceMagRow]) - magInfos = rowsToMagInfos(rows) - } yield magInfos - def findPathsUsedOnlyByThisDataset(datasetId: ObjectId): Fox[Seq[UPath]] = for { pathsStr <- run(q""" diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index 4b63fb4281b..3d08ba52c91 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -497,28 +497,6 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, _ <- Fox.serialCombined(pathInfos)(updateRealPath) } yield () - /** - * Returns a list of tuples, where the first element is the magInfo and the second element is a list of all magInfos - * that share the same realPath but have a different dataSourceId. For each mag in the data layer there is one tuple. - * @param datasetId id of the dataset - * @param layerName name of the layer in the dataset - * @return - */ - def getPathsForDataLayer(datasetId: ObjectId, - layerName: String): Fox[List[(DataSourceMagInfo, List[DataSourceMagInfo])]] = - for { - magInfos <- datasetMagsDAO.findPathsForDatasetAndDatalayer(datasetId, layerName) - magInfosAndLinkedMags <- Fox.serialCombined(magInfos)(magInfo => - magInfo.realPath match { - case Some(realPath) => - for { - pathInfos <- datasetMagsDAO.findAllByRealPath(realPath) - filteredPathInfos = pathInfos.filter(_.dataSourceId != magInfo.dataSourceId) - } yield (magInfo, filteredPathInfos) - case None => Fox.successful((magInfo, List())) - }) - } yield magInfosAndLinkedMags - def validatePaths(paths: Seq[UPath], dataStore: DataStore): Fox[Unit] = for { _ <- Fox.successful(()) diff --git a/conf/webknossos.latest.routes b/conf/webknossos.latest.routes index 96d0ad92b6e..904378478f1 100644 --- a/conf/webknossos.latest.routes +++ b/conf/webknossos.latest.routes @@ -126,7 +126,6 @@ GET /datastores PUT /datastores/:name/datasource controllers.WKRemoteDataStoreController.updateOne(name: String, key: String) PUT /datastores/:name/datasources controllers.WKRemoteDataStoreController.updateAll(name: String, key: String, organizationId: Option[String]) PUT /datastores/:name/datasources/paths controllers.WKRemoteDataStoreController.updatePaths(name: String, key: String) -GET /datastores/:name/datasources/:datasetId/paths controllers.WKRemoteDataStoreController.getPaths(name: String, key: String, datasetId: ObjectId) GET /datastores/:name/datasources/:datasetId controllers.WKRemoteDataStoreController.getDataSource(name: String, key: String, datasetId: ObjectId) PUT /datastores/:name/datasources/:datasetId controllers.WKRemoteDataStoreController.updateDataSource(name: String, key: String, datasetId: ObjectId) PATCH /datastores/:name/status controllers.WKRemoteDataStoreController.statusUpdate(name: String, key: String) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala index f1c034c0034..3353e5733cd 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala @@ -115,11 +115,6 @@ class DSRemoteWebknossosClient @Inject()( .silent .putJson(dataSourcePaths) - def fetchPaths(datasetId: ObjectId): Fox[List[LayerMagLinkInfo]] = - rpc(s"$webknossosUri/api/datastores/$dataStoreName/datasources/$datasetId/paths") - .addQueryParam("key", dataStoreKey) - .getWithJsonResponse[List[LayerMagLinkInfo]] - def reserveDataSourceUpload(info: ReserveUploadInformation)( implicit tc: TokenContext): Fox[ReserveAdditionalInformation] = for { From 991c3aff7dce7f36c9caae0f27f94cbf61bf17c5 Mon Sep 17 00:00:00 2001 From: Florian M Date: Mon, 27 Oct 2025 16:01:26 +0100 Subject: [PATCH 57/62] remove unused code --- .../WKRemoteDataStoreController.scala | 16 ---------------- app/models/dataset/DatasetService.scala | 2 +- .../services/DSRemoteWebknossosClient.scala | 2 +- 3 files changed, 2 insertions(+), 18 deletions(-) diff --git a/app/controllers/WKRemoteDataStoreController.scala b/app/controllers/WKRemoteDataStoreController.scala index 2848a160dfe..68f095102c2 100644 --- a/app/controllers/WKRemoteDataStoreController.scala +++ b/app/controllers/WKRemoteDataStoreController.scala @@ -5,7 +5,6 @@ import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.Fox import com.scalableminds.webknossos.datastore.controllers.JobExportProperties -import com.scalableminds.webknossos.datastore.helpers.{LayerMagLinkInfo, MagLinkInfo} import com.scalableminds.webknossos.datastore.models.UnfinishedUpload import com.scalableminds.webknossos.datastore.models.datasource.{ DataSource, @@ -230,21 +229,6 @@ class WKRemoteDataStoreController @Inject()( } } - def getPaths(name: String, key: String, datasetId: ObjectId): Action[AnyContent] = - Action.async { implicit request => - dataStoreService.validateAccess(name, key) { _ => - for { - dataset <- datasetDAO.findOne(datasetId)(GlobalAccessContext) ?~> Messages("dataset.notFound", datasetId) ~> NOT_FOUND - layers <- datasetLayerDAO.findAllForDataset(dataset._id) - magsAndLinkedMags <- Fox.serialCombined(layers)(l => datasetService.getPathsForDataLayer(dataset._id, l.name)) - magLinkInfos = magsAndLinkedMags.map(_.map { case (mag, linkedMags) => MagLinkInfo(mag, linkedMags) }) - layersAndMagLinkInfos = layers.zip(magLinkInfos).map { - case (layer, magLinkInfo) => LayerMagLinkInfo(layer.name, magLinkInfo) - } - } yield Ok(Json.toJson(layersAndMagLinkInfos)) - } - } - def getDataSource(name: String, key: String, datasetId: ObjectId): Action[AnyContent] = Action.async { implicit request => dataStoreService.validateAccess(name, key) { _ => diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index 3d08ba52c91..d82879a6155 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -4,7 +4,7 @@ import com.scalableminds.util.accesscontext.{AuthorizedAccessContext, DBAccessCo import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.time.Instant import com.scalableminds.util.tools.{Empty, EmptyBox, Fox, FoxImplicits, Full, JsonHelper, TextUtils} -import com.scalableminds.webknossos.datastore.helpers.{DataSourceMagInfo, UPath} +import com.scalableminds.webknossos.datastore.helpers.UPath import com.scalableminds.webknossos.datastore.models.datasource.{ DataSource, DataSourceId, diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala index 3353e5733cd..324cb82fef5 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DSRemoteWebknossosClient.scala @@ -10,7 +10,7 @@ import com.scalableminds.util.objectid.ObjectId import com.scalableminds.util.tools.{Fox, FoxImplicits} import com.scalableminds.webknossos.datastore.DataStoreConfig import com.scalableminds.webknossos.datastore.controllers.JobExportProperties -import com.scalableminds.webknossos.datastore.helpers.{IntervalScheduler, LayerMagLinkInfo, UPath} +import com.scalableminds.webknossos.datastore.helpers.{IntervalScheduler, UPath} import com.scalableminds.webknossos.datastore.models.UnfinishedUpload import com.scalableminds.webknossos.datastore.models.annotation.AnnotationSource import com.scalableminds.webknossos.datastore.models.datasource.{DataSource, DataSourceId} From d80fec9497dd349413fac621cd4ee8918ea3e5fb Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 28 Oct 2025 09:37:22 +0100 Subject: [PATCH 58/62] add null checks, add comments, improve logging --- app/models/dataset/Dataset.scala | 19 ++++++++------ app/models/dataset/DatasetService.scala | 10 ++++---- .../datastore/helpers/DatasetDeleter.scala | 25 ++++++++++--------- .../datastore/helpers/S3UriUtils.scala | 4 ++- .../datastore/services/ManagedS3Service.scala | 2 ++ 5 files changed, 34 insertions(+), 26 deletions(-) diff --git a/app/models/dataset/Dataset.scala b/app/models/dataset/Dataset.scala index 53a0c0accef..321cf77d9d1 100755 --- a/app/models/dataset/Dataset.scala +++ b/app/models/dataset/Dataset.scala @@ -868,23 +868,25 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte } ) - def findPathsUsedOnlyByThisDataset(datasetId: ObjectId): Fox[Seq[UPath]] = + def findMagPathsUsedOnlyByThisDataset(datasetId: ObjectId): Fox[Seq[UPath]] = for { - pathsStr <- run(q""" + pathsStrOpts <- run(q""" SELECT m1.path FROM webknossos.dataset_mags m1 WHERE m1._dataset = $datasetId + AND m1.path IS NOT NULL AND NOT EXISTS ( SELECT m2.path FROM webknossos.dataset_mags m2 WHERE m2._dataset != $datasetId AND ( m2.path = m1.path - OR - m2.realpath = m1.realpath + OR ( + m2.realpath IS NOT NULL AND m2.realpath = m1.realpath + ) ) ) - """.as[String]) - paths <- pathsStr.map(UPath.fromString).toList.toSingleBox("Invalid UPath").toFox + """.as[Option[String]]) + paths <- pathsStrOpts.flatten.map(UPath.fromString).toList.toSingleBox("Invalid UPath").toFox } yield paths def findDatasetsWithMagsInDir(absolutePath: UPath, @@ -896,7 +898,8 @@ class DatasetMagsDAO @Inject()(sqlClient: SqlClient)(implicit ec: ExecutionConte run(q""" SELECT d._id FROM webknossos.dataset_mags m JOIN webknossos.datasets d ON m._dataset = d._id - WHERE starts_with(m.realpath, $absolutePathWithTrailingSlash) + WHERE m.realpath IS NOT NULL + AND starts_with(m.realpath, $absolutePathWithTrailingSlash) AND d._id != $ignoredDataset AND d._datastore = ${dataStore.name.trim} """.as[ObjectId]) @@ -1272,7 +1275,7 @@ class DatasetLayerAttachmentsDAO @Inject()(sqlClient: SqlClient)(implicit ec: Ex """.as[StorageRelevantDataLayerAttachment]) } yield storageRelevantAttachments.toList - def findPathsUsedOnlyByThisDataset(datasetId: ObjectId): Fox[Seq[UPath]] = + def findAttachmentPathsUsedOnlyByThisDataset(datasetId: ObjectId): Fox[Seq[UPath]] = for { pathsStr <- run(q""" SELECT a1.path FROM webknossos.dataset_layer_attachments a1 diff --git a/app/models/dataset/DatasetService.scala b/app/models/dataset/DatasetService.scala index d82879a6155..e47c90d8a11 100644 --- a/app/models/dataset/DatasetService.scala +++ b/app/models/dataset/DatasetService.scala @@ -513,8 +513,9 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, datastoreClient <- clientFor(dataset) _ <- if (dataset.isVirtual) { for { - magPathsUsedOnlyByThisDataset <- datasetMagsDAO.findPathsUsedOnlyByThisDataset(dataset._id) - attachmentPathsUsedOnlyByThisDataset <- datasetLayerAttachmentsDAO.findPathsUsedOnlyByThisDataset(dataset._id) + magPathsUsedOnlyByThisDataset <- datasetMagsDAO.findMagPathsUsedOnlyByThisDataset(dataset._id) + attachmentPathsUsedOnlyByThisDataset <- datasetLayerAttachmentsDAO.findAttachmentPathsUsedOnlyByThisDataset( + dataset._id) pathsUsedOnlyByThisDataset = magPathsUsedOnlyByThisDataset ++ attachmentPathsUsedOnlyByThisDataset // Note that the datastore only deletes local paths and paths on our managed S3 cloud storage _ <- datastoreClient.deletePaths(pathsUsedOnlyByThisDataset) @@ -551,9 +552,8 @@ class DatasetService @Inject()(organizationDAO: OrganizationDAO, case Full(dataset) => for { annotationCount <- annotationDAO.countAllByDataset(dataset._id)(GlobalAccessContext) - _ = datasetDAO - .deleteDataset(dataset._id, onlyMarkAsDeleted = annotationCount > 0) - .flatMap(_ => usedStorageService.refreshStorageReportForDataset(dataset)) + _ <- datasetDAO.deleteDataset(dataset._id, onlyMarkAsDeleted = annotationCount > 0) + _ <- usedStorageService.refreshStorageReportForDataset(dataset) } yield () case _ => Fox.successful(()) } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala index 379004293c6..9cd8f2b359e 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala @@ -31,25 +31,26 @@ trait DatasetDeleter extends LazyLogging with DirectoryConstants with FoxImplici deleteWithRetry(dataSourcePath, targetPath) } else { Fox.successful(logger.info( - s"Dataset deletion requested for dataset at $dataSourcePath, but it does not exist. Skipping deletion on disk.")) + s"Dataset deletion requested for dataset $datasetId at $dataSourcePath, but it does not exist. Skipping deletion on disk.")) } } @tailrec private def deleteWithRetry(sourcePath: Path, targetPath: Path, retryCount: Int = 0)( implicit ec: ExecutionContext): Fox[Unit] = - try { - val deduplicatedTargetPath = - if (retryCount == 0) targetPath else targetPath.resolveSibling(f"${targetPath.getFileName} ($retryCount)") - val path = Files.move(sourcePath, deduplicatedTargetPath) - if (path == null) { - throw new Exception("Deleting dataset failed") + if (retryCount > 15) { + Fox.failure(s"Deleting dataset failed: too many retries.") + } else { + try { + val deduplicatedTargetPath = + if (retryCount == 0) targetPath else targetPath.resolveSibling(f"${targetPath.getFileName} ($retryCount)") + Files.move(sourcePath, deduplicatedTargetPath) + logger.info(s"Successfully moved dataset from $sourcePath to $targetPath.") + Fox.successful(()) + } catch { + case _: java.nio.file.FileAlreadyExistsException => deleteWithRetry(sourcePath, targetPath, retryCount + 1) + case e: Exception => Fox.failure(s"Deleting dataset failed: ${e.toString}", Full(e)) } - logger.info(s"Successfully moved dataset from $sourcePath to $targetPath...") - Fox.successful(()) - } catch { - case _: java.nio.file.FileAlreadyExistsException => deleteWithRetry(sourcePath, targetPath, retryCount + 1) - case e: Exception => Fox.failure(s"Deleting dataset failed: ${e.toString}", Full(e)) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/S3UriUtils.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/S3UriUtils.scala index 50dde28c970..290c6363599 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/S3UriUtils.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/S3UriUtils.scala @@ -8,7 +8,9 @@ object S3UriUtils { def hostBucketFromUri(uri: URI): Option[String] = { val host = uri.getHost - if (isShortStyle(uri)) { // assume host is omitted from uri, shortcut form s3://bucket/key + if (host == null) { + None + } else if (isShortStyle(uri)) { // assume host is omitted from uri, shortcut form s3://bucket/key Some(host) } else if (isVirtualHostedStyle(uri)) { Some(host.substring(0, host.length - ".s3.amazonaws.com".length)) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala index 0cbe848cbf7..8fbfe284311 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala @@ -37,9 +37,11 @@ class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) extends FoxIm } lazy val s3UploadBucketOpt: Option[String] = + // by convention, the credentialName is the S3 URI so we can extract the bucket from it. S3UriUtils.hostBucketFromUri(new URI(dataStoreConfig.Datastore.S3Upload.credentialName)) private lazy val s3UploadEndpoint: URI = { + // by convention, the credentialName is the S3 URI so we can extract the bucket from it. val credentialUri = new URI(dataStoreConfig.Datastore.S3Upload.credentialName) new URI( "https", From cc9908c3cc1bde40efb447fb14f9b8fdd8e9383f Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 28 Oct 2025 09:44:34 +0100 Subject: [PATCH 59/62] remove snapshot test of no-longer existing route --- .../__snapshots__/datasets.e2e.ts.snap | 28 ---------------- .../backend-snapshot-tests/datasets.e2e.ts | 32 ------------------- 2 files changed, 60 deletions(-) diff --git a/frontend/javascripts/test/backend-snapshot-tests/__snapshots__/datasets.e2e.ts.snap b/frontend/javascripts/test/backend-snapshot-tests/__snapshots__/datasets.e2e.ts.snap index 86501468034..5aaffe23e01 100644 --- a/frontend/javascripts/test/backend-snapshot-tests/__snapshots__/datasets.e2e.ts.snap +++ b/frontend/javascripts/test/backend-snapshot-tests/__snapshots__/datasets.e2e.ts.snap @@ -1,33 +1,5 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[`Dataset API (E2E) > Dataset Paths 1`] = ` -[ - { - "layerName": "segmentation", - "magLinkInfos": [ - { - "linkedMags": [], - "mag": { - "dataLayerName": "segmentation", - "dataSourceId": { - "name": "test-dataset", - "team": "Organization_X", - }, - "hasLocalData": true, - "mag": [ - 1, - 1, - 1, - ], - "path": "Organization_X/test-dataset/segmentation/1", - "realPath": "Organization_X/test-dataset/segmentation/1", - }, - }, - ], - }, -] -`; - exports[`Dataset API (E2E) > Zarr 3 streaming 1`] = `"{"zarr_format":3,"node_type":"group","attributes":{"ome":{"version":"0.5","multiscales":[{"name":"segmentation","axes":[{"name":"c","type":"channel"},{"name":"x","type":"space","unit":"nanometer"},{"name":"y","type":"space","unit":"nanometer"},{"name":"z","type":"space","unit":"nanometer"}],"datasets":[{"path":"1","coordinateTransformations":[{"type":"scale","scale":[1,11.24,11.24,28]}]}]}]}}}"`; exports[`Dataset API (E2E) > Zarr 3 streaming 2`] = `"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAA="`; diff --git a/frontend/javascripts/test/backend-snapshot-tests/datasets.e2e.ts b/frontend/javascripts/test/backend-snapshot-tests/datasets.e2e.ts index bea1e7509e8..fd4a88c7f26 100644 --- a/frontend/javascripts/test/backend-snapshot-tests/datasets.e2e.ts +++ b/frontend/javascripts/test/backend-snapshot-tests/datasets.e2e.ts @@ -174,38 +174,6 @@ describe("Dataset API (E2E)", () => { expect(base64).toMatchSnapshot(); }); - it("Dataset Paths", async () => { - const datasetId = await getTestDatasetId(); - const paths = await fetch( - `/api/datastores/localhost/datasources/${datasetId}/paths?key=something-secure`, - ); - const pathsJson = await paths.json(); - - // Dataset paths are absolute, we will relativize them here to make the snapshot stable - const makeRelative = (path: string) => - path.split("Organization_X")[1] ? "Organization_X" + path.split("Organization_X")[1] : path; - - interface MagLink { - mag: { - path: string; - realPath: string; - }; - } - - interface PathInfo { - magLinkInfos: MagLink[]; - } - - pathsJson.forEach((pathInfo: PathInfo) => - pathInfo.magLinkInfos.forEach((magLink: MagLink) => { - magLink.mag.path = makeRelative(magLink.mag.path); - magLink.mag.realPath = makeRelative(magLink.mag.realPath); - }), - ); - - expect(pathsJson).toMatchSnapshot(); - }); - /** * WARNING: This test creates a side effect by uploading and saving a dataset in your binaryData folder. * There is no clean up after the test, and the dataset will remain after each test run. From 6219144ee2841aceaca084a459f7b8b28b4e746e Mon Sep 17 00:00:00 2001 From: Florian M Date: Tue, 28 Oct 2025 09:58:37 +0100 Subject: [PATCH 60/62] add duration logging for dataset deletion --- app/controllers/DatasetController.scala | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/app/controllers/DatasetController.scala b/app/controllers/DatasetController.scala index 03e4908e3c8..6e351cce986 100755 --- a/app/controllers/DatasetController.scala +++ b/app/controllers/DatasetController.scala @@ -35,6 +35,7 @@ import play.api.libs.json._ import play.api.mvc.{Action, AnyContent, PlayBodyParsers} import play.silhouette.api.Silhouette import security.{AccessibleBySwitchingService, URLSharing, WkEnv} +import telemetry.SlackNotificationService import utils.{MetadataAssertions, WkConf} import javax.inject.Inject @@ -140,6 +141,7 @@ class DatasetController @Inject()(userService: UserService, thumbnailCachingService: ThumbnailCachingService, usedStorageService: UsedStorageService, conf: WkConf, + slackNotificationService: SlackNotificationService, authenticationService: AccessibleBySwitchingService, analyticsService: AnalyticsService, mailchimpClient: MailchimpClient, @@ -584,15 +586,19 @@ class DatasetController @Inject()(userService: UserService, def delete(datasetId: ObjectId): Action[AnyContent] = sil.SecuredAction.async { implicit request => log() { - for { - dataset <- datasetDAO.findOne(datasetId) ?~> notFoundMessage(datasetId.toString) ~> NOT_FOUND - _ <- Fox.fromBool(conf.Features.allowDeleteDatasets) ?~> "dataset.delete.disabled" - _ <- Fox.assertTrue(datasetService.isEditableBy(dataset, Some(request.identity))) ?~> "notAllowed" ~> FORBIDDEN - _ <- Fox.fromBool(request.identity.isAdminOf(dataset._organization)) ?~> "delete.mustBeOrganizationAdmin" ~> FORBIDDEN - _ = logger.info( - s"Deleting dataset $datasetId (isVirtual=${dataset.isVirtual}) as requested by user ${request.identity._id}...") - _ <- datasetService.deleteDataset(dataset) - } yield Ok + logTime(slackNotificationService.noticeSlowRequest) { + for { + dataset <- datasetDAO.findOne(datasetId) ?~> notFoundMessage(datasetId.toString) ~> NOT_FOUND + _ <- Fox.fromBool(conf.Features.allowDeleteDatasets) ?~> "dataset.delete.disabled" + _ <- Fox.assertTrue(datasetService.isEditableBy(dataset, Some(request.identity))) ?~> "notAllowed" ~> FORBIDDEN + _ <- Fox.fromBool(request.identity.isAdminOf(dataset._organization)) ?~> "delete.mustBeOrganizationAdmin" ~> FORBIDDEN + before = Instant.now + _ = logger.info( + s"Deleting dataset $datasetId (isVirtual=${dataset.isVirtual}) as requested by user ${request.identity._id}...") + _ <- datasetService.deleteDataset(dataset) + _ = Instant.logSince(before, s"Deleting dataset $datasetId") + } yield Ok + } } } From 1af5acaadc3ba65a6a14e68bf55990b461dc473d Mon Sep 17 00:00:00 2001 From: Florian M Date: Wed, 29 Oct 2025 13:03:38 +0100 Subject: [PATCH 61/62] simplify UPath.startsWith --- .../webknossos/datastore/helpers/UPath.scala | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala index 3494a0d92f2..92c10160907 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala @@ -184,16 +184,6 @@ private case class RemoteUPath(scheme: String, segments: Seq[String]) extends UP // < 2 check to avoid deleting “authority” (hostname:port) if (segments.length < 2) this else RemoteUPath(scheme, segments.dropRight(1)) - def parents: Seq[RemoteUPath] = { - val listBuffer = ListBuffer[RemoteUPath]() - var current = this - while (current.segments.length >= 2) { - listBuffer.addOne(current) - current = current.parent - } - listBuffer.toSeq - } - override def getScheme: Option[String] = Some(scheme) override def toRemoteUriUnsafe: URI = new URI(toString) @@ -207,8 +197,11 @@ private case class RemoteUPath(scheme: String, segments: Seq[String]) extends UP override def toAbsolute: UPath = this def startsWith(other: UPath): Boolean = other match { - case otherRemote: RemoteUPath => - this.normalize.parents.contains(otherRemote.normalize) + case otherRemote: RemoteUPath => { + val thisNormalized = this.normalize + val otherNormalized = otherRemote.normalize + thisNormalized.scheme == otherNormalized.scheme && thisNormalized.segments.startsWith(otherNormalized.segments) + } case _ => false } From 368893d8125e5b3ce0a0bca7d791e6e0eb6d029c Mon Sep 17 00:00:00 2001 From: Florian M Date: Thu, 30 Oct 2025 09:09:14 +0100 Subject: [PATCH 62/62] implement pr feedback --- .../webknossos/datastore/helpers/DatasetDeleter.scala | 5 +++-- .../webknossos/datastore/helpers/UPath.scala | 1 - .../webknossos/datastore/services/DataSourceService.scala | 8 ++------ .../webknossos/datastore/services/ManagedS3Service.scala | 6 ++---- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala index 9cd8f2b359e..118c178adb4 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/DatasetDeleter.scala @@ -30,8 +30,9 @@ trait DatasetDeleter extends LazyLogging with DirectoryConstants with FoxImplici s"Deleting dataset $datasetId by moving it from $dataSourcePath to $targetPath ${reason.map(r => s"because $r").getOrElse("...")}") deleteWithRetry(dataSourcePath, targetPath) } else { - Fox.successful(logger.info( - s"Dataset deletion requested for dataset $datasetId at $dataSourcePath, but it does not exist. Skipping deletion on disk.")) + logger.info( + s"Dataset deletion requested for dataset $datasetId at $dataSourcePath, but it does not exist. Skipping deletion on disk.") + Fox.successful(()) } } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala index 92c10160907..f852a2ee9ea 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/helpers/UPath.scala @@ -7,7 +7,6 @@ import play.api.libs.json.{Format, JsError, JsResult, JsString, JsSuccess, JsVal import java.net.URI import java.nio.file.Path -import scala.collection.mutable.ListBuffer trait UPath { def toRemoteUriUnsafe: URI diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala index ae98da4c3ee..2d057f8410d 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/DataSourceService.scala @@ -298,14 +298,10 @@ class DataSourceService @Inject()( } yield removedEntriesList.sum def deletePathsFromDiskOrManagedS3(paths: Seq[UPath]): Fox[Unit] = { - val localPaths = paths.filter(_.isLocal) + val localPaths = paths.filter(_.isLocal).flatMap(_.toLocalPath) val managedS3Paths = paths.filter(managedS3Service.pathIsInManagedS3) for { - _ <- Fox.serialCombined(localPaths) { - _.toLocalPath.flatMap { - PathUtils.deleteDirectoryRecursively - }.toFox - } + _ <- Fox.serialCombined(localPaths)(PathUtils.deleteDirectoryRecursively(_).toFox) _ <- managedS3Service.deletePaths(managedS3Paths) } yield () } diff --git a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala index 8fbfe284311..21e97f2e628 100644 --- a/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala +++ b/webknossos-datastore/app/com/scalableminds/webknossos/datastore/services/ManagedS3Service.scala @@ -125,10 +125,8 @@ class ManagedS3Service @Inject()(dataStoreConfig: DataStoreConfig) extends FoxIm implicit ec: ExecutionContext): Fox[Seq[String]] = { def listRecursive(continuationToken: Option[String], acc: Seq[String]): Fox[Seq[String]] = { val builder = ListObjectsV2Request.builder().bucket(bucket).prefix(prefix).maxKeys(1000) - val request = continuationToken match { - case Some(token) => builder.continuationToken(token).build() - case None => builder.build() - } + continuationToken.foreach(builder.continuationToken) + val request = builder.build() for { response <- Fox.fromFuture(s3Client.listObjectsV2(request).asScala) keys = response.contents().asScala.map(_.key())