Skip to content

Commit 19b0f02

Browse files
committed
running pipenv run black
1 parent 0a8e764 commit 19b0f02

File tree

6 files changed

+269
-134
lines changed

6 files changed

+269
-134
lines changed

scripts/migration/dataset_collection_json.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
except ImportError:
1010
import tomli as tomllib
1111

12-
path_to_env = os.path.join(os.getcwd(),"scripts","migration", ".env")
12+
path_to_env = os.path.join(os.getcwd(), "scripts", "migration", ".env")
1313
config = dotenv_values(dotenv_path=path_to_env)
1414

1515
CLOWDER_V1 = config["CLOWDER_V1"]
@@ -32,23 +32,26 @@
3232
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
3333
COLLECTIONS_FILE = "collections_datasets.json"
3434

35+
3536
def get_all_datasets(header):
3637
endpoint = f"{CLOWDER_V1}/api/datasets?superAdmin=true&limit=0"
3738
datasets = requests.get(endpoint, headers=header).json()
3839
return datasets
3940

41+
4042
def get_dataset_metadata(dataset_v1_id, headers_v1):
4143
# Get metadata from Clowder V1
4244
endpoint = (
4345
f"{CLOWDER_V1}/api/datasets/{dataset_v1_id}/metadata.jsonld?superAdmin=true"
4446
)
4547
metadata_v1 = requests.get(endpoint, headers=headers_v1).json()
4648
if len(metadata_v1) > 0:
47-
print('we got some metadata')
48-
with open('datasets_with_metadata.txt', 'a') as f:
49-
f.write(dataset_v1_id + '\n')
49+
print("we got some metadata")
50+
with open("datasets_with_metadata.txt", "a") as f:
51+
f.write(dataset_v1_id + "\n")
5052
return metadata_v1
5153

54+
5255
def get_dataset_collections_map():
5356
print("Getting collections and datasets from Clowder v1...")
5457

@@ -67,19 +70,22 @@ def get_dataset_collections_map():
6770
dataset_to_collection[dataset] = current_value
6871
return dataset_to_collection
6972

73+
7074
def get_datasets_in_collections():
7175
map = get_dataset_collections_map()
7276
datasets_in_collections = list(map.keys())
7377
datasets_with_metadata = []
7478
for i in range(0, len(datasets_in_collections)):
7579
current_dataset = datasets_in_collections[i]
76-
dataset_metadata = get_dataset_metadata(current_dataset, base_headers_v1, datasets_with_metadata)
80+
dataset_metadata = get_dataset_metadata(
81+
current_dataset, base_headers_v1, datasets_with_metadata
82+
)
7783
return datasets_in_collections
7884

85+
7986
if __name__ == "__main__":
8087
all_datasets = get_all_datasets(base_headers_v1)
8188
for i in range(0, len(all_datasets)):
8289
current_dataset = all_datasets[i]
83-
get_dataset_metadata(current_dataset['id'], base_headers_v1)
90+
get_dataset_metadata(current_dataset["id"], base_headers_v1)
8491
get_datasets_in_collections()
85-

scripts/migration/get_collections.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
OUTPUT_FILE = "collections_ids.txt"
2525

2626
# Load environment variables
27-
path_to_env = os.path.join(os.getcwd(),"scripts","migration", ".env")
27+
path_to_env = os.path.join(os.getcwd(), "scripts", "migration", ".env")
2828
config = dotenv_values(dotenv_path=path_to_env)
2929

3030

@@ -60,12 +60,14 @@
6060
"last_name": "admin",
6161
}
6262

63+
6364
def get_clowder_v1_top_level_collections(headers):
6465
endpoint = f"{CLOWDER_V1}/api/collections/topLevelCollections?superAdmin=true"
6566
response = requests.get(endpoint, headers=headers)
6667
user_collections = response.json()
6768
return user_collections
6869

70+
6971
def get_collection_v1_descendants(headers, collection_id):
7072
descendant_ids = []
7173

@@ -75,17 +77,20 @@ def get_collection_v1_descendants(headers, collection_id):
7577
print(collection_json["child_collection_ids"])
7678
if int(collection_json["childCollectionsCount"]) > 0:
7779
child_collections_ids = collection_json["child_collection_ids"]
78-
descendant_ids = child_collections_ids[5:-1].split(', ')
80+
descendant_ids = child_collections_ids[5:-1].split(", ")
7981
for i in range(0, len(descendant_ids)):
8082
id = descendant_ids[i]
8183
descendent_endpoint = f"{CLOWDER_V1}/api/collections/{id}"
82-
descendent_response = requests.get(descendent_endpoint, headers=headers, verify=False)
84+
descendent_response = requests.get(
85+
descendent_endpoint, headers=headers, verify=False
86+
)
8387
descendent_json = descendent_response.json()
8488
if int(descendent_json["childCollectionsCount"]) > 0:
8589
sub_descendants = get_collection_v1_descendants(headers, id)
8690
descendant_ids.extend(sub_descendants)
8791
return descendant_ids
8892

93+
8994
def get_dataset_ids_in_v1_collection(headers, collection_id):
9095
dataset_ids = []
9196
collection_endpoint = f"{CLOWDER_V1}/api/collections/{collection_id}/datasets"
@@ -95,17 +100,23 @@ def get_dataset_ids_in_v1_collection(headers, collection_id):
95100
dataset_ids.append(dataset["id"])
96101
return dataset_ids
97102

103+
98104
if __name__ == "__main__":
99105
top_level_collections = get_clowder_v1_top_level_collections(clowder_headers_v1)
100106
all_v1_collections = []
101107
for collection in top_level_collections:
102-
print(f"Getting descendents for collection {collection['name']} ({collection['id']})")
108+
print(
109+
f"Getting descendents for collection {collection['name']} ({collection['id']})"
110+
)
103111
all_v1_collections.append(collection["id"])
104112
if int(collection["childCollectionsCount"]) > 0:
105-
descendant_ids = get_collection_v1_descendants(clowder_headers_v1, collection["id"])
113+
descendant_ids = get_collection_v1_descendants(
114+
clowder_headers_v1, collection["id"]
115+
)
106116
all_v1_collections.extend(descendant_ids)
107-
print(f"Added descendents for collection {collection['name']} ({collection['id']})")
108-
117+
print(
118+
f"Added descendents for collection {collection['name']} ({collection['id']})"
119+
)
109120

110121
print(f"TOTAL V1 COLLECTIONS TO MIGRATE: {len(all_v1_collections)}")
111122

scripts/migration/get_collections_datasets.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,15 @@
1010
import tomli as tomllib
1111

1212

13-
1413
DEFAULT_PASSWORD = "Password123&"
1514

1615
# Get the current timestamp
1716
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1817
COLLECTIONS_FILE = "collections_ids.txt"
1918

2019

21-
2220
# Load environment variables
23-
path_to_env = os.path.join(os.getcwd(),"scripts","migration", ".env")
21+
path_to_env = os.path.join(os.getcwd(), "scripts", "migration", ".env")
2422
config = dotenv_values(dotenv_path=path_to_env)
2523

2624

@@ -56,6 +54,7 @@
5654
"last_name": "admin",
5755
}
5856

57+
5958
def get_collections_datasets(headers, collection_id):
6059
collection_dataset_endpoint = (
6160
f"{CLOWDER_V1}/api/collections/{collection_id}/datasets?superAdmin=true"
@@ -70,16 +69,16 @@ def get_collections_datasets(headers, collection_id):
7069
if __name__ == "__main__":
7170
print("Getting collections and datasets from Clowder v1...")
7271

73-
collection_ids =[]
72+
collection_ids = []
7473
if os.path.exists(COLLECTIONS_FILE):
75-
print('exists')
74+
print("exists")
7675
else:
77-
print('does not exist')
76+
print("does not exist")
7877

7978
with open(COLLECTIONS_FILE, "r") as outfile:
8079
lines = outfile.readlines()
8180
for line in lines:
82-
collection_ids.append(line.rstrip('\n'))
81+
collection_ids.append(line.rstrip("\n"))
8382
print(f"Found {len(collection_ids)} collections in {COLLECTIONS_FILE}")
8483
collection_dataset_dict = dict()
8584
for id in collection_ids:
@@ -94,4 +93,4 @@ def get_collections_datasets(headers, collection_id):
9493
json_file = "collections_datasets.json"
9594
with open(json_file, "w") as jf:
9695
json.dump(collection_dataset_dict, jf)
97-
print("dumped to a file")
96+
print("dumped to a file")

0 commit comments

Comments
 (0)