From 2f7ea1c1100d1a4e580ffd951ceccadca2660186 Mon Sep 17 00:00:00 2001 From: Arkadiy Kukarkin Date: Mon, 27 Oct 2025 11:55:41 +0100 Subject: [PATCH] remove mongodb --- .github/actions/go-test-setup/action.yml | 6 - cmd/admin/migrate-dataset.go | 33 -- cmd/admin/migrate-schedule.go | 28 -- cmd/app.go | 2 - docs/en/cli-reference/admin/README.md | 2 - .../en/cli-reference/admin/migrate-dataset.md | 26 -- .../cli-reference/admin/migrate-schedule.md | 25 -- docs/jp/cli-reference/admin/README.md | 2 - .../jp/cli-reference/admin/migrate-dataset.md | 26 -- .../cli-reference/admin/migrate-schedule.md | 25 -- docs/kr/cli-reference/admin/README.md | 2 - .../kr/cli-reference/admin/migrate-dataset.md | 27 -- .../cli-reference/admin/migrate-schedule.md | 25 -- docs/zh/cli-reference/admin/README.md | 2 - .../zh/cli-reference/admin/migrate-dataset.md | 26 -- .../cli-reference/admin/migrate-schedule.md | 25 -- go.mod | 7 +- go.sum | 7 - migrate/migrate-dataset.go | 299 ------------------ migrate/migrate-dataset_test.go | 242 -------------- migrate/migrate-schedule.go | 154 --------- migrate/migrate-schedule_test.go | 117 ------- migrate/types.go | 106 ------- 23 files changed, 1 insertion(+), 1213 deletions(-) delete mode 100644 cmd/admin/migrate-dataset.go delete mode 100644 cmd/admin/migrate-schedule.go delete mode 100644 docs/en/cli-reference/admin/migrate-dataset.md delete mode 100644 docs/en/cli-reference/admin/migrate-schedule.md delete mode 100644 docs/jp/cli-reference/admin/migrate-dataset.md delete mode 100644 docs/jp/cli-reference/admin/migrate-schedule.md delete mode 100644 docs/kr/cli-reference/admin/migrate-dataset.md delete mode 100644 docs/kr/cli-reference/admin/migrate-schedule.md delete mode 100644 docs/zh/cli-reference/admin/migrate-dataset.md delete mode 100644 docs/zh/cli-reference/admin/migrate-schedule.md delete mode 100644 migrate/migrate-dataset.go delete mode 100644 migrate/migrate-dataset_test.go delete mode 100644 migrate/migrate-schedule.go delete mode 100644 migrate/migrate-schedule_test.go delete mode 100644 migrate/types.go diff --git a/.github/actions/go-test-setup/action.yml b/.github/actions/go-test-setup/action.yml index a1415f13..b7b261fc 100644 --- a/.github/actions/go-test-setup/action.yml +++ b/.github/actions/go-test-setup/action.yml @@ -26,12 +26,6 @@ runs: - name: Create MySQL database shell: bash run: mysql -u root -e "create database singularity" - - name: Start MongoDB - uses: supercharge/mongodb-github-action@v1.10.0 - if: matrix.os == 'ubuntu' - with: - mongodb-version: '6.0' - mongodb-port: 27018 - run: | echo "GOTESTFLAGS=$GOTESTFLAGS -timeout=30m" >> $GITHUB_ENV echo "GO386FLAGS=$GO386FLAGS -timeout=30m" >> $GITHUB_ENV diff --git a/cmd/admin/migrate-dataset.go b/cmd/admin/migrate-dataset.go deleted file mode 100644 index 3c4f11f9..00000000 --- a/cmd/admin/migrate-dataset.go +++ /dev/null @@ -1,33 +0,0 @@ -package admin - -import ( - "github.com/data-preservation-programs/singularity/migrate" - "github.com/urfave/cli/v2" -) - -var MigrateDatasetCmd = &cli.Command{ - Name: "migrate-dataset", - Usage: "Migrate dataset from old singularity mongodb", - Description: "Migrate datasets from singularity V1 to V2. Those steps include\n" + - " 1. Create source storage and output storage and attach them to a dataprep in V2.\n" + - " 2. Create all folder structures and files in the new dataset.\n" + - "Caveats:\n" + - " 1. The created preparation won't be compatible with the new dataset worker.\n" + - " So do not attempt to resume a data preparation or push new files onto migrated dataset.\n" + - " You can make deals or browse the dataset without issues.\n" + - " 2. The folder CID won't be generated or migrated due to the complexity", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "mongo-connection-string", - Usage: "MongoDB connection string", - EnvVars: []string{"MONGO_CONNECTION_STRING"}, - Value: "mongodb://localhost:27017", - }, - &cli.BoolFlag{ - Name: "skip-files", - Usage: "Skip migrating details about files and folders. This will make the migration much faster. Useful if you only want to make deals.", - Value: false, - }, - }, - Action: migrate.MigrateDataset, -} diff --git a/cmd/admin/migrate-schedule.go b/cmd/admin/migrate-schedule.go deleted file mode 100644 index 78049f7f..00000000 --- a/cmd/admin/migrate-schedule.go +++ /dev/null @@ -1,28 +0,0 @@ -package admin - -import ( - "github.com/data-preservation-programs/singularity/migrate" - "github.com/urfave/cli/v2" -) - -var MigrateScheduleCmd = &cli.Command{ - Name: "migrate-schedule", - Usage: "Migrate schedule from old singularity mongodb", - Description: "Migrate schedules from singularity V1 to V2. Note that\n" + - " 1. You must complete dataset migration first\n" + - " 2. All new schedules will be created with status 'paused'\n" + - " 3. The deal states will not be migrated over as it will be populated with deal tracker automatically\n" + - " 4. --output-csv is no longer supported. We will provide a new tool in the future\n" + - " 5. # of replicas is no longer supported as part of the schedule. We will make this a configurable policy in the future\n" + - " 6. --force is no longer supported. We may add similar support to ignore all policy restrictions in the future\n" + - " 7. --offline is no longer supported. It will be always offline deal for legacy market and online deal for boost market if URL template is configured", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "mongo-connection-string", - Usage: "MongoDB connection string", - EnvVars: []string{"MONGO_CONNECTION_STRING"}, - Value: "mongodb://localhost:27017", - }, - }, - Action: migrate.MigrateSchedule, -} diff --git a/cmd/app.go b/cmd/app.go index 2d21eab7..32e5ccad 100644 --- a/cmd/app.go +++ b/cmd/app.go @@ -120,8 +120,6 @@ Upgrading: Subcommands: []*cli.Command{ admin.InitCmd, admin.ResetCmd, - admin.MigrateDatasetCmd, - admin.MigrateScheduleCmd, }, }, DownloadCmd, diff --git a/docs/en/cli-reference/admin/README.md b/docs/en/cli-reference/admin/README.md index f7e036d6..0e654c78 100644 --- a/docs/en/cli-reference/admin/README.md +++ b/docs/en/cli-reference/admin/README.md @@ -11,8 +11,6 @@ USAGE: COMMANDS: init Initialize or upgrade the database reset Reset the database - migrate-dataset Migrate dataset from old singularity mongodb - migrate-schedule Migrate schedule from old singularity mongodb help, h Shows a list of commands or help for one command OPTIONS: diff --git a/docs/en/cli-reference/admin/migrate-dataset.md b/docs/en/cli-reference/admin/migrate-dataset.md deleted file mode 100644 index 9ebc72f2..00000000 --- a/docs/en/cli-reference/admin/migrate-dataset.md +++ /dev/null @@ -1,26 +0,0 @@ -# Migrate dataset from old singularity mongodb - -{% code fullWidth="true" %} -``` -NAME: - singularity admin migrate-dataset - Migrate dataset from old singularity mongodb - -USAGE: - singularity admin migrate-dataset [command options] - -DESCRIPTION: - Migrate datasets from singularity V1 to V2. Those steps include - 1. Create source storage and output storage and attach them to a dataprep in V2. - 2. Create all folder structures and files in the new dataset. - Caveats: - 1. The created preparation won't be compatible with the new dataset worker. - So do not attempt to resume a data preparation or push new files onto migrated dataset. - You can make deals or browse the dataset without issues. - 2. The folder CID won't be generated or migrated due to the complexity - -OPTIONS: - --mongo-connection-string value MongoDB connection string (default: "mongodb://localhost:27017") [$MONGO_CONNECTION_STRING] - --skip-files Skip migrating details about files and folders. This will make the migration much faster. Useful if you only want to make deals. (default: false) - --help, -h show help -``` -{% endcode %} diff --git a/docs/en/cli-reference/admin/migrate-schedule.md b/docs/en/cli-reference/admin/migrate-schedule.md deleted file mode 100644 index 42441d36..00000000 --- a/docs/en/cli-reference/admin/migrate-schedule.md +++ /dev/null @@ -1,25 +0,0 @@ -# Migrate schedule from old singularity mongodb - -{% code fullWidth="true" %} -``` -NAME: - singularity admin migrate-schedule - Migrate schedule from old singularity mongodb - -USAGE: - singularity admin migrate-schedule [command options] - -DESCRIPTION: - Migrate schedules from singularity V1 to V2. Note that - 1. You must complete dataset migration first - 2. All new schedules will be created with status 'paused' - 3. The deal states will not be migrated over as it will be populated with deal tracker automatically - 4. --output-csv is no longer supported. We will provide a new tool in the future - 5. # of replicas is no longer supported as part of the schedule. We will make this a configurable policy in the future - 6. --force is no longer supported. We may add similar support to ignore all policy restrictions in the future - 7. --offline is no longer supported. It will be always offline deal for legacy market and online deal for boost market if URL template is configured - -OPTIONS: - --mongo-connection-string value MongoDB connection string (default: "mongodb://localhost:27017") [$MONGO_CONNECTION_STRING] - --help, -h show help -``` -{% endcode %} diff --git a/docs/jp/cli-reference/admin/README.md b/docs/jp/cli-reference/admin/README.md index 5fec77c9..ab77447d 100644 --- a/docs/jp/cli-reference/admin/README.md +++ b/docs/jp/cli-reference/admin/README.md @@ -11,8 +11,6 @@ コマンド: init データベースを初期化します reset データベースをリセットします - migrate-dataset 古い singularity mongodb からデータセットを移行します - migrate-schedule 古い singularity mongodb からスケジュールを移行します help, h コマンドの一覧または特定のコマンドのヘルプを表示します オプション: diff --git a/docs/jp/cli-reference/admin/migrate-dataset.md b/docs/jp/cli-reference/admin/migrate-dataset.md deleted file mode 100644 index 65d8a3df..00000000 --- a/docs/jp/cli-reference/admin/migrate-dataset.md +++ /dev/null @@ -1,26 +0,0 @@ -# 旧バージョンのSingularity MongoDBからデータセットを移行する - -{% code fullWidth="true" %} -``` -NAME: - singularity admin migrate-dataset - 旧バージョンのSingularity MongoDBからデータセットを移行する - -USAGE: - singularity admin migrate-dataset [コマンドオプション] [引数...] - -DESCRIPTION: - Singularity V1からV2へのデータセットの移行を行います。以下の手順が含まれます: - 1. ソースストレージと出力ストレージを作成し、それらをV2のデータプリップにアタッチします。 - 2. 新しいデータセットにすべてのフォルダの構造とファイルを作成します。 - 注意事項: - 1. 作成されたデータプリップは新しいデータセットのワーカーと互換性がありません。 - したがって、データプリップを再開したり、移行されたデータセットに新しいファイルを追加したりしないでください。 - 問題なくデータセットを取引したり、閲覧することはできます。 - 2. フォルダのCIDは、複雑さのために生成されたり移行されたりしません。 - -OPTIONS: - --mongo-connection-string value MongoDBの接続文字列 (デフォルト: "mongodb://localhost:27017") [$MONGO_CONNECTION_STRING] - --skip-files ファイルとフォルダの詳細情報の移行をスキップします。これにより移行が高速化されます。取引のみを行いたい場合に便利です。 (デフォルト: false) - --help, -h ヘルプを表示します -``` -{% endcode %} \ No newline at end of file diff --git a/docs/jp/cli-reference/admin/migrate-schedule.md b/docs/jp/cli-reference/admin/migrate-schedule.md deleted file mode 100644 index 6077ee36..00000000 --- a/docs/jp/cli-reference/admin/migrate-schedule.md +++ /dev/null @@ -1,25 +0,0 @@ -# 旧バージョンのシンギュラリティMongoDBからスケジュールを移行する - -{% code fullWidth="true" %} -``` -NAME: - singularity admin migrate-schedule - 旧バージョンのシンギュラリティMongoDBからスケジュールを移行する - -使い方: - singularity admin migrate-schedule [コマンドオプション] [引数...] - -説明: - シンギュラリティV1からV2へのスケジュールの移行を行います。以下の点に注意してください。 - 1. データセットの移行を先に完了する必要があります - 2. 新しいスケジュールはすべてステータス '一時停止' で作成されます - 3. 取引状態は移行されません。ディールトラッカーにより自動的にポピュレートされます - 4. --output-csv はもはやサポートされていません。将来的に新しいツールを提供します - 5. レプリカの数はスケジュールの一部としてはもはやサポートされていません。将来的には設定可能なポリシーとなります - 6. --force はもはやサポートされていません。将来的にはすべてのポリシー制限を無視する類似のサポートを追加するかもしれません - 7. --offline はもはやサポートされていません。URLテンプレートが設定されている場合、従来のマーケットではオフラインディールになり、ブーストマーケットではオンラインディールになります - -オプション: - --mongo-connection-string value MongoDB接続文字列 (デフォルト: "mongodb://localhost:27017") [$MONGO_CONNECTION_STRING] - --help, -h ヘルプを表示 -``` -{% endcode %} \ No newline at end of file diff --git a/docs/kr/cli-reference/admin/README.md b/docs/kr/cli-reference/admin/README.md index 85279b9f..f1914132 100644 --- a/docs/kr/cli-reference/admin/README.md +++ b/docs/kr/cli-reference/admin/README.md @@ -11,8 +11,6 @@ 명령어들: init 데이터베이스 초기화 reset 데이터베이스 재설정 - migrate-dataset 이전 싱귤래리티 몽고DB에서 데이터셋 이관 - migrate-schedule 이전 싱귤래리티 몽고DB에서 일정 이관 help, h 명령어 리스트 보여주기 또는 특정 명령어에 대한 도움말 보여주기 옵션: diff --git a/docs/kr/cli-reference/admin/migrate-dataset.md b/docs/kr/cli-reference/admin/migrate-dataset.md deleted file mode 100644 index 1734f5b1..00000000 --- a/docs/kr/cli-reference/admin/migrate-dataset.md +++ /dev/null @@ -1,27 +0,0 @@ -# 이전 싱귤래리티 MongoDB로부터 데이터셋 마이그레이션하기 - -{% code fullWidth="true" %} -``` -NAME: - singularity admin migrate-dataset - 이전 싱귤래리티 MongoDB로부터 데이터셋 마이그레이션하기 - -사용법: - singularity admin migrate-dataset [command options] [arguments...] - -설명: - 싱귤래리티 V1에서 V2로 데이터셋을 마이그레이션합니다. 다음과 같은 단계를 포함합니다. - 1. 소스 스토리지와 출력 스토리지를 생성하고 V2의 데이터준비(DataPrep)에 연결합니다. - 2. 새로운 데이터셋에 모든 폴더 구조와 파일을 생성합니다. - 주의사항: - 1. 생성된 준비는 새로운 데이터셋 워커와 호환되지 않습니다. - 따라서 데이터 준비를 재개하거나 마이그레이션된 데이터셋에 새로운 파일을 추가하지 마십시오. - 이러한 작업은 문제 없이 거래를 진행하거나 데이터셋을 찾아볼 수 있습니다. - 2. 폴더 CID는 복잡성으로 인해 생성되거나 마이그레이션되지 않습니다. - -옵션: - --mongo-connection-string value MongoDB 연결 문자열 (기본값: "mongodb://localhost:27017") [$MONGO_CONNECTION_STRING] - --skip-files 파일과 폴더에 대한 세부 정보 마이그레이션을 건너뜁니다. 이렇게 하면 마이그레이션 속도가 훨씬 빨라집니다. - 거래만 수행하고자 할 경우 유용합니다. (기본값: false) - --help, -h 도움말 표시 -``` -{% endcode %} \ No newline at end of file diff --git a/docs/kr/cli-reference/admin/migrate-schedule.md b/docs/kr/cli-reference/admin/migrate-schedule.md deleted file mode 100644 index 4f49f3c2..00000000 --- a/docs/kr/cli-reference/admin/migrate-schedule.md +++ /dev/null @@ -1,25 +0,0 @@ -# 오래된 싱귤래리 모든 사용자의 파일들을 이관한다. - -{% code fullWidth="true" %} -``` -NAME: - singularity admin migrate-schedule - 오래된 싱귤래리 모든 사용자의 파일들을 이관한다. - -USAGE: - singularity admin migrate-schedule [command options] [arguments...] - -DESCRIPTION: - 싱귤래리 V1에서 V2로 스케줄을 이관한다. 주의사항: - 1. 데이터셋 이관을 먼저 완료해야 한다. - 2. 모든 새로운 스케줄은 '일시 정지' 상태로 생성된다. - 3. 거래 상태는 자동으로 거래 추적기로 채워질 것이므로 이관되지 않는다. - 4. --output-csv는 더 이상 지원되지 않는다. 앞으로 새로운 도구를 제공할 것이다. - 5. 레플리카 개수는 스케줄의 일부로 지원되지 않는다. 앞으로 이것은 설정 가능한 정책으로 만들 것이다. - 6. --force는 더 이상 지원되지 않는다. 앞으로 정책 제한을 모두 무시할 수 있는 유사한 지원을 추가할 수도 있다. - 7. --offline는 더 이상 지원되지 않는다. URL 템플릿이 설정된 경우 레거시 마켓에는 항상 오프라인 거래로 설정되고 부스트 마켓에는 온라인 거래로 설정될 것이다. - -OPTIONS: - --mongo-connection-string value MongoDB 연결 문자열 (기본값: "mongodb://localhost:27017") [$MONGO_CONNECTION_STRING] - --help, -h 도움말 표시 -``` -{% endcode %} \ No newline at end of file diff --git a/docs/zh/cli-reference/admin/README.md b/docs/zh/cli-reference/admin/README.md index 26a99f36..5e960902 100644 --- a/docs/zh/cli-reference/admin/README.md +++ b/docs/zh/cli-reference/admin/README.md @@ -11,8 +11,6 @@ 命令: init 初始化数据库 reset 重置数据库 - migrate-dataset 从旧的singularity mongodb迁移数据集 - migrate-schedule 从旧的singularity mongodb迁移计划 help, h 显示命令列表或一个命令的帮助 选项: diff --git a/docs/zh/cli-reference/admin/migrate-dataset.md b/docs/zh/cli-reference/admin/migrate-dataset.md deleted file mode 100644 index e8831c68..00000000 --- a/docs/zh/cli-reference/admin/migrate-dataset.md +++ /dev/null @@ -1,26 +0,0 @@ -# 从旧的Singularity MongoDB迁移数据集 - -{% code fullWidth="true" %} -``` -命令: - singularity admin migrate-dataset - 从旧的Singularity MongoDB迁移数据集 - -使用方法: - singularity admin migrate-dataset [命令选项] [参数] - -描述: - 从Singularity V1迁移数据集到V2。步骤包括: - 1. 在V2中创建源存储和输出存储,并将它们附加到数据准备。 - 2. 在新数据集中创建所有文件夹结构和文件。 - 注意事项: - 1. 创建的数据准备与新数据集的工作程序不兼容。 - 因此,请勿尝试恢复数据准备或将新文件推送到迁移的数据集。 - 您可以无问题地查看或浏览数据集。 - 2. 由于复杂性,文件夹CID将不会生成或迁移。 - -选项: - --mongo-connection-string value MongoDB连接字符串(默认值:"mongodb://localhost:27017")[$MONGO_CONNECTION_STRING] - --skip-files 跳过迁移文件和文件夹的详细信息。这将加快迁移速度。如果只想进行交易,则非常有用。(默认值:false) - --help, -h 显示帮助 -``` -{% endcode %} \ No newline at end of file diff --git a/docs/zh/cli-reference/admin/migrate-schedule.md b/docs/zh/cli-reference/admin/migrate-schedule.md deleted file mode 100644 index 96ca431f..00000000 --- a/docs/zh/cli-reference/admin/migrate-schedule.md +++ /dev/null @@ -1,25 +0,0 @@ -# 从旧的Singularity MongoDB迁移调度 - -{% code fullWidth="true" %} -``` -NAME: - singularity admin migrate-schedule - 从旧的Singularity MongoDB迁移调度 - -用法: - singularity admin migrate-schedule [命令选项] [参数...] - -描述: - 从Singularity V1迁移调度到V2。请注意: - 1. 您必须先完成数据集迁移 - 2. 所有新的调度将被创建为“已暂停”状态 - 3. 交易状态不会被迁移,因为它将自动由交易跟踪器填充 - 4. --output-csv不再支持。我们将在未来提供一个新的工具 - 5. 副本数不再作为调度的一部分支持。我们将在未来将其作为可配置策略 - 6. --force不再支持。我们可能会在未来添加类似的支持来忽略所有策略限制 - 7. --offline不再支持。如果配置了URL模板,传统市场将始终是离线交易,增量市场将始终是在线交易 - -选项: - --mongo-connection-string value MongoDB连接字符串 (默认: "mongodb://localhost:27017") [$MONGO_CONNECTION_STRING] - --help, -h 显示帮助 -``` -{% endcode %} \ No newline at end of file diff --git a/go.mod b/go.mod index 612b2d2d..27f33117 100644 --- a/go.mod +++ b/go.mod @@ -72,7 +72,6 @@ require ( github.com/swaggo/swag v1.16.1 github.com/urfave/cli/v2 v2.27.3 github.com/ybbus/jsonrpc/v3 v3.1.4 - go.mongodb.org/mongo-driver v1.12.1 go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 golang.org/x/text v0.22.0 @@ -162,7 +161,6 @@ require ( github.com/golang-jwt/jwt/v4 v4.5.0 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect - github.com/golang/snappy v0.0.4 // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/google/gopacket v1.1.19 // indirect github.com/google/pprof v0.0.0-20250202011525-fc3143867406 // indirect @@ -246,7 +244,6 @@ require ( github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect - github.com/montanaflynn/stats v0.7.0 // indirect github.com/mr-tron/base58 v1.2.0 // indirect github.com/multiformats/go-base32 v0.1.0 // indirect github.com/multiformats/go-base36 v0.2.0 // indirect @@ -331,15 +328,13 @@ require ( github.com/wlynxg/anet v0.0.5 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect - github.com/xdg-go/pbkdf2 v1.0.0 // indirect - github.com/xdg-go/scram v1.1.2 // indirect - github.com/xdg-go/stringprep v1.0.4 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect github.com/yunify/qingstor-sdk-go/v3 v3.2.0 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect github.com/zeebo/blake3 v0.2.3 // indirect github.com/zeebo/errs v1.3.0 // indirect + go.mongodb.org/mongo-driver v1.12.1 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/otel v1.28.0 // indirect go.opentelemetry.io/otel/metric v1.28.0 // indirect diff --git a/go.sum b/go.sum index 74329b33..246258e7 100644 --- a/go.sum +++ b/go.sum @@ -421,8 +421,6 @@ github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= -github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -833,8 +831,6 @@ github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6/go.mod h1:E2VnQOmVuvZB6U github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= -github.com/montanaflynn/stats v0.7.0 h1:r3y12KyNxj/Sb/iOE46ws+3mS1+MZca1wlHQFPsY/JU= -github.com/montanaflynn/stats v0.7.0/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/mr-tron/base58 v1.1.0/go.mod h1:xcD2VGqlgYjBdcBLw+TuYLr8afG+Hj8g2eTVqeSzSU8= @@ -1193,15 +1189,12 @@ github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= -github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.0.2/go.mod h1:1WAq6h33pAW+iRreB34OORO2Nf7qel3VV3fjBj+hCSs= github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= -github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= github.com/xdg-go/stringprep v1.0.2/go.mod h1:8F9zXuvzgwmyT5DUm4GUfZGDdT3W+LCvS6+da4O5kxM= github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= -github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= github.com/xlab/c-for-go v0.0.0-20200718154222-87b0065af829/go.mod h1:h/1PEBwj7Ym/8kOuMWvO2ujZ6Lt+TMbySEXNhjjR87I= github.com/xlab/pkgconfig v0.0.0-20170226114623-cea12a0fd245/go.mod h1:C+diUUz7pxhNY6KAoLgrTYARGWnt82zWTylZlxT92vk= diff --git a/migrate/migrate-dataset.go b/migrate/migrate-dataset.go deleted file mode 100644 index db3c5c74..00000000 --- a/migrate/migrate-dataset.go +++ /dev/null @@ -1,299 +0,0 @@ -package migrate - -import ( - "context" - "log" - "path/filepath" - "strings" - - "github.com/cockroachdb/errors" - "github.com/data-preservation-programs/singularity/database" - "github.com/data-preservation-programs/singularity/model" - util2 "github.com/data-preservation-programs/singularity/pack/packutil" - "github.com/data-preservation-programs/singularity/pack/push" - "github.com/data-preservation-programs/singularity/util" - "github.com/ipfs/go-cid" - format "github.com/ipfs/go-ipld-format" - "github.com/urfave/cli/v2" - "go.mongodb.org/mongo-driver/bson" - "go.mongodb.org/mongo-driver/mongo" - "go.mongodb.org/mongo-driver/mongo/options" - "gorm.io/gorm" -) - -//nolint:gocritic -func migrateDataset(ctx context.Context, mg *mongo.Client, db *gorm.DB, scanning ScanningRequest, skipFiles bool) error { - sourceType := "local" - path := scanning.Path - config := make(map[string]string) - if strings.HasPrefix(scanning.Path, "s3://") { - sourceType = "s3" - path = strings.TrimPrefix(scanning.Path, "s3://") - config["provider"] = "AWS" - } - - preparation := model.Preparation{ - Name: scanning.Name, - MaxSize: int64(scanning.MaxSize), - PieceSize: int64(util.NextPowerOfTwo(scanning.MaxSize)), - SourceStorages: []model.Storage{{ - Name: scanning.Name + "-source", - Type: sourceType, - Path: path, - Config: config, - }}, - OutputStorages: []model.Storage{{ - Name: scanning.Name + "-output", - Type: "local", - Path: scanning.OutDir, - }}, - } - - err := db.Create(&preparation).Error - if err != nil { - return errors.WithStack(err) - } - - var attachment model.SourceAttachment - err = db.Where("preparation_id = ? AND storage_id = ?", preparation.ID, preparation.SourceStorages[0].ID).First(&attachment).Error - if err != nil { - return errors.WithStack(err) - } - - rootDir := model.Directory{ - AttachmentID: attachment.ID, - Name: path, - } - err = db.Create(&rootDir).Error - if err != nil { - return errors.WithStack(err) - } - - log.Printf("-- Created preparation %s\n", scanning.Name) - cursor, err := mg.Database("singularity").Collection("generationrequests").Find( - ctx, bson.M{"datasetName": scanning.Name}, - ) - if err != nil { - return errors.Wrap(err, "failed to query mongo for generation requests") - } - - directoryCache := map[string]model.DirectoryID{} - directoryCache[""] = rootDir.ID - directoryCache["."] = rootDir.ID - var lastFile model.File - for cursor.Next(ctx) { - var generation GenerationRequest - err = cursor.Decode(&generation) - if err != nil { - return errors.Wrap(err, "failed to decode generation request") - } - - packJob := model.Job{ - Type: model.Pack, - State: model.Complete, - ErrorMessage: generation.ErrorMessage, - ErrorStackTrace: "", - AttachmentID: attachment.ID, - } - err = db.Create(&packJob).Error - if err != nil { - return errors.WithStack(err) - } - log.Printf("-- Created pack job %d for %s\n", packJob.ID, scanning.Name) - - pieceCID, err := cid.Parse(generation.PieceCID) - if err != nil { - log.Printf("failed to parse piece cid %s\n", generation.PieceCID) - pieceCID = cid.Undef - } - dataCID, err := cid.Parse(generation.DataCID) - if err != nil { - log.Printf("failed to parse data cid %s\n", generation.DataCID) - dataCID = cid.Undef - } - fileName := generation.PieceCID + ".car" - if generation.FilenameOverride != "" { - fileName = generation.FilenameOverride - } - car := model.Car{ - CreatedAt: generation.CreatedAt, - PieceCID: model.CID(pieceCID), - PieceSize: int64(generation.PieceSize), - RootCID: model.CID(dataCID), - FileSize: int64(generation.CarSize), - StoragePath: filepath.Join(scanning.OutDir, fileName), - AttachmentID: &attachment.ID, - PreparationID: preparation.ID, - } - err = db.Create(&car).Error - if err != nil { - return errors.Wrap(err, "failed to create car") - } - log.Printf("-- Created car %s for %s\n", generation.PieceCID, scanning.Name) - - if skipFiles { - continue - } - cursor, err := mg.Database("singularity").Collection("outputfilelists").Find( - ctx, - bson.M{"generationId": generation.ID.Hex()}, - options.Find().SetSort(bson.M{"index": 1})) - if err != nil { - return errors.Wrap(err, "failed to query mongo for output file lists") - } - var files []model.File - for cursor.Next(ctx) { - var fileList OutputFileList - err = cursor.Decode(&fileList) - if err != nil { - return errors.Wrap(err, "failed to decode output file list") - } - for _, generatedFile := range fileList.GeneratedFileList { - if generatedFile.CID == "unrecoverable" { - continue - } - if generatedFile.Dir { - continue - } - fileCID, err := cid.Parse(generatedFile.CID) - if err != nil { - return errors.Wrapf(err, "failed to parse file cid %s", generatedFile.CID) - } - - var file model.File - if generatedFile.IsComplete() { - file = model.File{ - Path: generatedFile.Path, - Size: int64(generatedFile.Size), - CID: model.CID(fileCID), - FileRanges: []model.FileRange{ - { - Offset: 0, - Length: int64(generatedFile.Size), - CID: model.CID(fileCID), - JobID: &packJob.ID, - }, - }, - AttachmentID: attachment.ID, - LastModifiedNano: generation.CreatedAt.UnixNano(), - } - } else if generatedFile.Start == 0 { - lastFile = model.File{ - Path: generatedFile.Path, - Size: int64(generatedFile.Size), - CID: model.CID(cid.Undef), - FileRanges: []model.FileRange{ - { - Offset: 0, - Length: int64(generatedFile.End), - CID: model.CID(fileCID), - JobID: &packJob.ID, - }, - }, - AttachmentID: attachment.ID, - LastModifiedNano: generation.CreatedAt.UnixNano(), - } - continue - } else { - lastFile.FileRanges = append(lastFile.FileRanges, model.FileRange{ - Offset: int64(generatedFile.Start), - Length: int64(generatedFile.End - generatedFile.Start), - CID: model.CID(fileCID), - JobID: &packJob.ID, - }) - if generatedFile.End < generatedFile.Size { - continue - } else { - file = lastFile - lastFile = model.File{} - links := make([]format.Link, 0) - for _, part := range file.FileRanges { - links = append(links, format.Link{ - Size: uint64(part.Length), - Cid: cid.Cid(part.CID), - }) - } - _, root, err := util2.AssembleFileFromLinks(links) - if err != nil { - return errors.Wrap(err, "failed to assemble file from links") - } - file.CID = model.CID(root.Cid()) - } - } - err = push.EnsureParentDirectories(ctx, db, &file, rootDir.ID, directoryCache) - if err != nil { - return errors.Wrap(err, "failed to ensure parent directories") - } - directory, ok := directoryCache[filepath.Dir(file.Path)] - if !ok { - return errors.Errorf("directory %s not found in cache", filepath.Dir(file.Path)) - } - file.DirectoryID = &directory - files = append(files, file) - } - } - if len(files) > 0 { - err = db.CreateInBatches(&files, util.BatchSize).Error - if err != nil { - return errors.Wrap(err, "failed to create files") - } - } - log.Printf("-- Created %d files for %s\n", len(files), scanning.Name) - } - - return nil -} - -func MigrateDataset(cctx *cli.Context) error { - skipFiles := cctx.Bool("skip-files") - log.Println("Migrating dataset from old singularity database") - mongoConnectionString := cctx.String("mongo-connection-string") - sqlConnectionString := cctx.String("database-connection-string") - log.Printf("Using mongo connection string: %s\n", mongoConnectionString) - log.Printf("Using sql connection string: %s\n", sqlConnectionString) - db, closer, err := database.OpenFromCLI(cctx) - if err != nil { - return errors.WithStack(err) - } - defer closer.Close() - ctx := cctx.Context - db = db.WithContext(ctx) - mg, err := mongo.Connect(ctx, options.Client().ApplyURI(mongoConnectionString)) - if err != nil { - return errors.Wrap(err, "failed to connect to mongo") - } - - err = model.AutoMigrate(db) - if err != nil { - return errors.Wrap(err, "failed to auto-migrate database") - } - - resp, err := mg.Database("singularity").Collection("scanningrequests").Find(ctx, bson.M{}) - if err != nil { - return errors.Wrap(err, "failed to query mongo for scanning requests") - } - - var scannings []ScanningRequest - err = resp.All(ctx, &scannings) - if err != nil { - return errors.Wrap(err, "failed to decode mongo response") - } - - for _, scanning := range scannings { - var datasetExists int64 - err = db.Model(&model.Storage{}).Where("name = ?", scanning.Name+"-source").Count(&datasetExists).Error - if err != nil { - return errors.Wrapf(err, "failed to query for dataset %s", scanning.Name) - } - if datasetExists > 0 { - log.Printf("Preparation %s already exists, skipping\n", scanning.Name) - continue - } - log.Printf("Migrating Preparation: %s\n", scanning.Name) - err = migrateDataset(ctx, mg, db, scanning, skipFiles) - if err != nil { - return errors.Wrapf(err, "failed to migrate dataset %s", scanning.Name) - } - } - return nil -} diff --git a/migrate/migrate-dataset_test.go b/migrate/migrate-dataset_test.go deleted file mode 100644 index f95d5b10..00000000 --- a/migrate/migrate-dataset_test.go +++ /dev/null @@ -1,242 +0,0 @@ -package migrate - -import ( - "context" - "flag" - "os" - "path/filepath" - "testing" - "time" - - "github.com/cockroachdb/errors" - "github.com/data-preservation-programs/singularity/model" - "github.com/data-preservation-programs/singularity/util/testutil" - "github.com/ipfs/boxo/util" - "github.com/ipfs/go-cid" - "github.com/stretchr/testify/require" - "github.com/urfave/cli/v2" - "go.mongodb.org/mongo-driver/bson/primitive" - "go.mongodb.org/mongo-driver/mongo" - "go.mongodb.org/mongo-driver/mongo/options" - "gorm.io/gorm" -) - -// Using 27018 intentionally to avoid deleting default singularity V1 database -var localMongoDB = "mongodb://localhost:27018" - -func TestMigrateDataset(t *testing.T) { - err := setupMongoDBDataset() - if err != nil { - t.Log(err) - t.Skip("Skipping test because MongoDB is not available") - } - testutil.All(t, func(ctx context.Context, t *testing.T, db *gorm.DB) { - flagSet := flag.NewFlagSet("", 0) - flagSet.String("mongo-connection-string", localMongoDB, "") - flagSet.String("database-connection-string", os.Getenv("DATABASE_CONNECTION_STRING"), "") - cctx := cli.NewContext(&cli.App{ - Writer: os.Stdout, - }, flagSet, nil) - err = MigrateDataset(cctx) - require.NoError(t, err) - // Migrate again does nothing - err = MigrateDataset(cctx) - require.NoError(t, err) - - var preparations []model.Preparation - err = db.Preload("SourceStorages").Preload("OutputStorages").Find(&preparations).Error - require.NoError(t, err) - require.Len(t, preparations, 2) - require.Equal(t, "test-source", preparations[0].SourceStorages[0].Name) - require.Equal(t, "/path", preparations[0].SourceStorages[0].Path) - require.Equal(t, "local", preparations[0].SourceStorages[0].Type) - require.EqualValues(t, int64(18*1024*1024*1024), preparations[0].MaxSize) - require.EqualValues(t, int64(32*1024*1024*1024), preparations[0].PieceSize) - require.Equal(t, "test2-source", preparations[1].SourceStorages[0].Name) - require.Equal(t, "s3path", preparations[1].SourceStorages[0].Path) - require.Equal(t, "s3", preparations[1].SourceStorages[0].Type) - require.Equal(t, filepath.Join("out", "dir"), preparations[0].OutputStorages[0].Path) - - var dirs []model.Directory - err = db.Find(&dirs).Error - require.NoError(t, err) - require.Len(t, dirs, 3) - require.Equal(t, "/path", dirs[0].Name) - require.Equal(t, "dir", dirs[1].Name) - require.Equal(t, "s3path", dirs[2].Name) - - var files []model.File - err = db.Find(&files).Error - require.NoError(t, err) - require.Len(t, files, 3) - require.Equal(t, "1.txt", files[0].Path) - require.Equal(t, "2.txt", files[1].Path) - require.Equal(t, "dir/3.txt", files[2].Path) - require.EqualValues(t, 1, *files[0].DirectoryID) - require.EqualValues(t, 1, *files[1].DirectoryID) - require.EqualValues(t, 2, *files[2].DirectoryID) - - var fileRanges []model.FileRange - err = db.Find(&fileRanges).Error - require.NoError(t, err) - require.Len(t, fileRanges, 5) - require.EqualValues(t, 0, fileRanges[0].Offset) - require.EqualValues(t, 100, fileRanges[0].Length) - require.EqualValues(t, 0, fileRanges[1].Offset) - require.EqualValues(t, 20, fileRanges[1].Length) - require.EqualValues(t, 20, fileRanges[2].Offset) - require.EqualValues(t, 60, fileRanges[2].Length) - require.EqualValues(t, 80, fileRanges[3].Offset) - require.EqualValues(t, 20, fileRanges[3].Length) - - var packJobs []model.Job - err = db.Find(&packJobs).Error - require.NoError(t, err) - require.Len(t, packJobs, 2) - require.EqualValues(t, 1, packJobs[0].AttachmentID) - require.Equal(t, model.Complete, packJobs[0].State) - require.Equal(t, "error message", packJobs[0].ErrorMessage) - - var cars []model.Car - err = db.Find(&cars).Error - require.NoError(t, err) - require.Len(t, cars, 2) - require.EqualValues(t, int64(32*1024*1024*1024), cars[0].PieceSize) - require.EqualValues(t, int64(20*1024*1024*1024), cars[0].FileSize) - require.EqualValues(t, filepath.Join("out", "dir", "test.car"), cars[0].StoragePath) - require.NotEmpty(t, cars[0].PieceCID.String()) - require.NotEmpty(t, cars[0].RootCID.String()) - require.EqualValues(t, 1, *cars[0].AttachmentID) - }) -} - -func setupMongoDBDataset() error { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - db, err := mongo.Connect(ctx, options.Client().ApplyURI(localMongoDB)) - if err != nil { - return errors.WithStack(err) - } - defer db.Disconnect(context.Background()) - err = db.Database("singularity").Drop(ctx) - if err != nil { - return errors.WithStack(err) - } - insertScanningResult, err := db.Database("singularity").Collection("scanningrequests").InsertMany(ctx, []any{ScanningRequest{ - Name: "test", - Path: "/path", - OutDir: filepath.Join("out", "dir"), - MinSize: uint64(16 * 1024 * 1024 * 1024), - MaxSize: uint64(18 * 1024 * 1024 * 1024), - Status: ScanningStatusCompleted, - ErrorMessage: "error message", - TmpDir: "/tmp/dir", - SkipInaccessibleFiles: false, - }, ScanningRequest{ - Name: "test2", - Path: "s3://s3path", - OutDir: filepath.Join("out", "dir"), - MinSize: uint64(16 * 1024 * 1024 * 1024), - MaxSize: uint64(18 * 1024 * 1024 * 1024), - Status: ScanningStatusCompleted, - ErrorMessage: "error message", - TmpDir: "/tmp/dir", - SkipInaccessibleFiles: false, - }}) - if err != nil { - return errors.WithStack(err) - } - - dataCID := cid.NewCidV1(cid.Raw, util.Hash([]byte("test"))) - pieceCID := cid.NewCidV1(cid.FilCommitmentUnsealed, util.Hash([]byte("test"))) - insertGenerationResult, err := db.Database("singularity").Collection("generationrequests").InsertMany(ctx, []any{GenerationRequest{ - DatasetID: insertScanningResult.InsertedIDs[0].(primitive.ObjectID).Hex(), - DatasetName: "test", - Path: "/path", - OutDir: filepath.Join("out", "dir"), - Index: 0, - Status: GenerationStatusCompleted, - ErrorMessage: "error message", - DataCID: dataCID.String(), - CarSize: uint64(20 * 1024 * 1024 * 1024), - PieceCID: pieceCID.String(), - PieceSize: uint64(32 * 1024 * 1024 * 1024), - FilenameOverride: "test.car", - TmpDir: "/tmp/dir", - SkipInaccessibleFiles: false, - CreatedAt: time.Now(), - }, GenerationRequest{ - DatasetID: insertScanningResult.InsertedIDs[0].(primitive.ObjectID).Hex(), - DatasetName: "test", - Path: "/path", - OutDir: filepath.Join("out", "dir"), - Index: 1, - Status: GenerationStatusCompleted, - ErrorMessage: "error message", - DataCID: "unrecoverable", - CarSize: uint64(20 * 1024 * 1024 * 1024), - PieceCID: pieceCID.String(), - PieceSize: uint64(32 * 1024 * 1024 * 1024), - FilenameOverride: "test2.car", - TmpDir: "/tmp/dir", - SkipInaccessibleFiles: false, - CreatedAt: time.Now(), - }}) - if err != nil { - return errors.WithStack(err) - } - _, err = db.Database("singularity").Collection("outputfilelists").InsertMany(ctx, []any{OutputFileList{ - GenerationID: insertGenerationResult.InsertedIDs[0].(primitive.ObjectID).Hex(), - Index: 0, - GeneratedFileList: []GeneratedFile{{ - Path: "", - Dir: true, - }, { - Path: "1.txt", - Dir: false, - CID: dataCID.String(), - Size: 100, - Start: 0, - End: 100, - }, { - Path: "2.txt", - Dir: false, - CID: dataCID.String(), - Size: 100, - Start: 0, - End: 20, - }, { - Path: "2.txt", - Dir: false, - CID: dataCID.String(), - Size: 100, - Start: 20, - End: 80, - }, { - Path: "2.txt", - Dir: false, - CID: dataCID.String(), - Size: 100, - Start: 80, - End: 100, - }, { - Path: "dir", - Dir: true, - }, { - Path: "dir/3.txt", - Dir: false, - CID: dataCID.String(), - Size: 100, - Start: 0, - End: 0, - }, { - Path: "dir/4.txt", - Dir: false, - CID: "unrecoverable", - Size: 100, - Start: 0, - End: 0, - }}, - }}) - return errors.WithStack(err) -} diff --git a/migrate/migrate-schedule.go b/migrate/migrate-schedule.go deleted file mode 100644 index 07f4f3bb..00000000 --- a/migrate/migrate-schedule.go +++ /dev/null @@ -1,154 +0,0 @@ -package migrate - -import ( - "log" - "os" - "regexp" - "strings" - "time" - - "github.com/cockroachdb/errors" - "github.com/data-preservation-programs/singularity/cmd/cliutil" - "github.com/data-preservation-programs/singularity/database" - "github.com/data-preservation-programs/singularity/model" - "github.com/data-preservation-programs/singularity/util" - "github.com/urfave/cli/v2" - "go.mongodb.org/mongo-driver/bson" - "go.mongodb.org/mongo-driver/bson/primitive" - "go.mongodb.org/mongo-driver/mongo" - "go.mongodb.org/mongo-driver/mongo/options" -) - -var pieceCidRegex = regexp.MustCompile("baga[0-9a-z]+") - -func MigrateSchedule(c *cli.Context) error { - log.Println("Migrating dataset from old singularity database") - mongoConnectionString := c.String("mongo-connection-string") - sqlConnectionString := c.String("database-connection-string") - log.Printf("Using mongo connection string: %s\n", mongoConnectionString) - log.Printf("Using sql connection string: %s\n", sqlConnectionString) - db, closer, err := database.OpenFromCLI(c) - if err != nil { - return errors.WithStack(err) - } - defer closer.Close() - ctx := c.Context - db = db.WithContext(ctx) - mg, err := mongo.Connect(ctx, options.Client().ApplyURI(c.String("mongo-connection-string"))) - if err != nil { - return errors.Wrap(err, "failed to connect to mongo") - } - - var count int64 - err = db.Model(&model.Schedule{}).Count(&count).Error - if err != nil { - return errors.Wrap(err, "failed to count schedules") - } - - if count > 0 { - log.Println("Schedules already exist, skipping") - return nil - } - - resp, err := mg.Database("singularity").Collection("replicationrequests").Find(ctx, bson.M{}) - if err != nil { - return errors.Wrap(err, "failed to query mongo for scanning requests") - } - - var replications []ReplicationRequest - err = resp.All(ctx, &replications) - if err != nil { - return errors.Wrap(err, "failed to decode mongo response") - } - - var schedules []model.Schedule - for _, replication := range replications { - var scanning ScanningRequest - oid, err := primitive.ObjectIDFromHex(replication.DatasetID) - if err != nil { - return errors.Wrapf(err, "failed to parse dataset id %s", replication.DatasetID) - } - findResult := mg.Database("singularity").Collection("scanningrequests").FindOne(ctx, bson.M{"_id": oid}) - if findResult.Err() != nil { - if err != nil { - return errors.Wrapf(err, "failed to find dataset %s", replication.DatasetID) - } - return errors.Errorf("failed to find dataset %s", replication.DatasetID) - } - - err = findResult.Decode(&scanning) - if err != nil { - return errors.Wrapf(err, "failed to decode dataset %s", replication.DatasetID) - } - - var preparation model.Preparation - err = preparation.FindByIDOrName(db, scanning.Name) - if err != nil { - return errors.Wrapf(err, "failed to find preparation %s", scanning.Name) - } - - var urlTemplate string - if replication.URLPrefix != "" { - if !strings.HasSuffix(replication.URLPrefix, "/") { - replication.URLPrefix += "/" - } - urlTemplate = replication.URLPrefix + "{PIECE_CID}" - } - totalDealNumber := replication.MaxNumberOfDeals - var scheduleDealNumber int - var maxPendingDealNumber int - if replication.CronSchedule != "" { - totalDealNumber = replication.CronMaxDeals - scheduleDealNumber = int(replication.MaxNumberOfDeals) - maxPendingDealNumber = int(replication.CronMaxPendingDeals) - } - var allowedCIDs model.StringSlice - if replication.FileListPath != "" { - content, err := os.ReadFile(replication.FileListPath) - if err != nil { - log.Printf("failed to read file list %s. Skipping...", replication.FileListPath) - } else { - allowedCIDs = pieceCidRegex.FindAllString(string(content), -1) - } - } - - for _, provider := range strings.Split(replication.StorageProviders, ",") { - if provider == "" { - continue - } - schedule := model.Schedule{ - CreatedAt: replication.CreatedAt, - UpdatedAt: replication.UpdatedAt, - URLTemplate: urlTemplate, - Provider: provider, - PricePerGBEpoch: replication.MaxPrice, - TotalDealNumber: int(totalDealNumber), - TotalDealSize: 0, - Verified: replication.IsVerified, - KeepUnsealed: true, - AnnounceToIPNI: true, - StartDelay: time.Second * time.Duration(replication.StartDelay) * 30, - Duration: time.Second * time.Duration(replication.Duration) * 30, - State: model.SchedulePaused, - ScheduleCron: replication.CronSchedule, - ScheduleDealNumber: scheduleDealNumber, - ScheduleDealSize: 0, - MaxPendingDealNumber: maxPendingDealNumber, - MaxPendingDealSize: 0, - Notes: replication.Notes, - ErrorMessage: replication.ErrorMessage, - AllowedPieceCIDs: allowedCIDs, - PreparationID: preparation.ID, - } - schedules = append(schedules, schedule) - } - } - - err = db.CreateInBatches(&schedules, util.BatchSize).Error - if err != nil { - return errors.Wrap(err, "failed to create schedules") - } - - cliutil.Print(c, schedules) - return nil -} diff --git a/migrate/migrate-schedule_test.go b/migrate/migrate-schedule_test.go deleted file mode 100644 index dab02c2f..00000000 --- a/migrate/migrate-schedule_test.go +++ /dev/null @@ -1,117 +0,0 @@ -package migrate - -import ( - "context" - "flag" - "os" - "testing" - "time" - - "github.com/cockroachdb/errors" - "github.com/data-preservation-programs/singularity/model" - "github.com/data-preservation-programs/singularity/util/testutil" - "github.com/stretchr/testify/require" - "github.com/urfave/cli/v2" - "go.mongodb.org/mongo-driver/bson/primitive" - "go.mongodb.org/mongo-driver/mongo" - "go.mongodb.org/mongo-driver/mongo/options" - "gorm.io/gorm" -) - -func TestMigrateSchedule_DatasetNotExist(t *testing.T) { - err := setupMongoDBSchedule() - if err != nil { - t.Log(err) - t.Skip("Skipping test because MongoDB is not available") - } - - testutil.All(t, func(ctx context.Context, t *testing.T, db *gorm.DB) { - flagSet := flag.NewFlagSet("", 0) - flagSet.String("mongo-connection-string", localMongoDB, "") - flagSet.String("database-connection-string", os.Getenv("DATABASE_CONNECTION_STRING"), "") - cctx := cli.NewContext(&cli.App{ - Writer: os.Stdout, - }, flagSet, nil) - - err = db.Create(&model.Preparation{ - Name: "test", - SourceStorages: []model.Storage{{ - Name: "test-source", - }}, - OutputStorages: []model.Storage{{ - Name: "test-output", - }}, - }).Error - require.NoError(t, err) - - err = MigrateSchedule(cctx) - require.NoError(t, err) - - // Migrate again does nothing - err = MigrateSchedule(cctx) - require.NoError(t, err) - - var schedules []model.Schedule - err = db.Find(&schedules).Error - require.NoError(t, err) - require.Len(t, schedules, 2) - require.EqualValues(t, 1, schedules[0].PreparationID) - require.Equal(t, "http://localhost:8080/{PIECE_CID}", schedules[0].URLTemplate) - require.Equal(t, "f0miner1", schedules[0].Provider) - require.Equal(t, 100, schedules[0].TotalDealNumber) - require.True(t, schedules[0].Verified) - require.True(t, schedules[0].KeepUnsealed) - require.True(t, schedules[0].AnnounceToIPNI) - require.Equal(t, time.Hour*24, schedules[0].StartDelay) - require.Equal(t, time.Minute/2*150000, schedules[0].Duration) - require.Equal(t, model.SchedulePaused, schedules[0].State) - require.Equal(t, 10, schedules[0].ScheduleDealNumber) - require.Equal(t, 10, schedules[0].MaxPendingDealNumber) - require.Equal(t, "notes", schedules[0].Notes) - require.Equal(t, "error message", schedules[0].ErrorMessage) - }) -} - -func setupMongoDBSchedule() error { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - db, err := mongo.Connect(ctx, options.Client().ApplyURI(localMongoDB)) - if err != nil { - return errors.WithStack(err) - } - defer db.Disconnect(context.Background()) - err = db.Database("singularity").Drop(ctx) - if err != nil { - return errors.WithStack(err) - } - insertedDatasetResult, err := db.Database("singularity").Collection("scanningrequests").InsertOne(ctx, ScanningRequest{ - Name: "test", - }) - if err != nil { - return errors.WithStack(err) - } - _, err = db.Database("singularity").Collection("replicationrequests").InsertMany(ctx, []any{ReplicationRequest{ - DatasetID: insertedDatasetResult.InsertedID.(primitive.ObjectID).Hex(), - MaxReplicas: 10, - StorageProviders: "f0miner1,f0miner2", - Client: "f0client", - URLPrefix: "http://localhost:8080", - MaxPrice: 0, - MaxNumberOfDeals: 10, - IsVerified: true, - StartDelay: 2880, - Duration: 150000, - IsOffline: false, - Status: ReplicationStatusActive, - CronSchedule: "* * * * *", - CronMaxDeals: 100, - CronMaxPendingDeals: 10, - FileListPath: "1.txt", - Notes: "notes", - ErrorMessage: "error message", - }}) - if err != nil { - return errors.WithStack(err) - } - return nil -} diff --git a/migrate/types.go b/migrate/types.go deleted file mode 100644 index 9de6c98e..00000000 --- a/migrate/types.go +++ /dev/null @@ -1,106 +0,0 @@ -package migrate - -import ( - "time" - - "go.mongodb.org/mongo-driver/bson/primitive" -) - -type ( - ScanningRequestStatus string - GenerationRequestStatus string - ReplicationRequestStatus string -) - -const ( - ScanningStatusActive ScanningRequestStatus = "active" - ScanningStatusCompleted ScanningRequestStatus = "completed" - ScanningStatusError ScanningRequestStatus = "error" - ScanningStatusPaused ScanningRequestStatus = "paused" - GenerationStatusActive GenerationRequestStatus = "active" - GenerationStatusError GenerationRequestStatus = "error" - GenerationStatusPaused GenerationRequestStatus = "paused" - GenerationStatusCompleted GenerationRequestStatus = "completed" - GenerationStatusCreated GenerationRequestStatus = "created" - GenerationStatusDAG GenerationRequestStatus = "dag" - ReplicationStatusActive ReplicationRequestStatus = "active" - ReplicationStatusError ReplicationRequestStatus = "error" - ReplicationStatusPaused ReplicationRequestStatus = "paused" - ReplicationStatusCompleted ReplicationRequestStatus = "completed" -) - -type ScanningRequest struct { - ID primitive.ObjectID `bson:"_id,omitempty"` - Name string `bson:"name"` - Path string `bson:"path"` - OutDir string `bson:"outDir"` - MinSize uint64 `bson:"minSize"` - MaxSize uint64 `bson:"maxSize"` - Status ScanningRequestStatus `bson:"status"` - ErrorMessage string `bson:"errorMessage"` - TmpDir string `bson:"tmpDir"` - SkipInaccessibleFiles bool `bson:"skipInaccessibleFiles"` -} - -type GenerationRequest struct { - ID primitive.ObjectID `bson:"_id,omitempty"` - DatasetID string `bson:"datasetId"` - DatasetName string `bson:"datasetName"` - Path string `bson:"path"` - OutDir string `bson:"outDir"` - Index int64 `bson:"index"` - Status GenerationRequestStatus `bson:"status"` - ErrorMessage string `bson:"errorMessage"` - DataCID string `bson:"dataCid"` - CarSize uint64 `bson:"carSize"` - PieceCID string `bson:"pieceCid"` - PieceSize uint64 `bson:"pieceSize"` - FilenameOverride string `bson:"filenameOverride"` - TmpDir string `bson:"tmpDir"` - SkipInaccessibleFiles bool `bson:"skipInaccessibleFiles"` - CreatedAt time.Time `bson:"createdAt"` -} - -type OutputFileList struct { - ID primitive.ObjectID `bson:"_id,omitempty"` - GenerationID string `bson:"generationId"` - Index int64 `bson:"index"` - GeneratedFileList []GeneratedFile `bson:"generatedFileList"` -} - -type GeneratedFile struct { - Path string `bson:"path"` - Dir bool `bson:"dir"` - CID string `bson:"cid"` - Size uint64 `bson:"size"` - Start uint64 `bson:"start"` - End uint64 `bson:"end"` -} - -func (g GeneratedFile) IsComplete() bool { - return g.Start == 0 && (g.End == 0 || g.End == g.Size) -} - -type ReplicationRequest struct { - ID primitive.ObjectID `bson:"_id"` - CreatedAt time.Time `bson:"createdAt"` - UpdatedAt time.Time `bson:"updatedAt"` - DatasetID string `bson:"datasetId"` - MaxReplicas int `bson:"maxReplicas"` // targeted replica per piece - StorageProviders string `bson:"storageProviders"` // comma separated SP - Client string `bson:"client"` // deal sent from client address - URLPrefix string `bson:"urlPrefix"` - MaxPrice float64 `bson:"maxPrice"` // unit in Fil - MaxNumberOfDeals uint64 `bson:"maxNumberOfDeals"` // per SP, unlimited if 0 - IsVerified bool `bson:"isVerified"` - StartDelay uint64 `bson:"startDelay"` // in epoch - Duration uint64 `bson:"duration"` // in epoch - IsOffline bool `bson:"isOffline"` - Status ReplicationRequestStatus `bson:"status"` - CronSchedule string `bson:"cronSchedule"` - CronMaxDeals uint64 `bson:"cronMaxDeals"` - CronMaxPendingDeals uint64 `bson:"cronMaxPendingDeals"` - FileListPath string `bson:"fileListPath"` - Notes string `bson:"notes"` - ErrorMessage string `bson:"errorMessage"` -}