Skip to content

Commit 786253a

Browse files
Merge pull request #673 from guzman-raphael/migrate_method
Add DJ011 blob migration utility function, Test improvements, and minor updates
2 parents bddf71f + b85d217 commit 786253a

14 files changed

+463
-176
lines changed

LNX-docker-compose.yml

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ services:
1818
- DJ_TEST_HOST=db
1919
- DJ_TEST_USER=datajoint
2020
- DJ_TEST_PASSWORD=datajoint
21-
- S3_ENDPOINT=minio:9000
21+
- S3_ENDPOINT=fakeminio.datajoint.io:9000
2222
- S3_ACCESS_KEY=datajoint
2323
- S3_SECRET_KEY=datajoint
2424
- S3_BUCKET=datajoint-test
@@ -70,5 +70,26 @@ services:
7070
timeout: 5s
7171
retries: 60
7272
interval: 1s
73+
fakeminio.datajoint.io:
74+
<<: *net
75+
image: nginx:alpine
76+
environment:
77+
- URL=datajoint.io
78+
- SUBDOMAINS=fakeminio
79+
- MINIO_SERVER=http://minio:9000
80+
entrypoint: /entrypoint.sh
81+
healthcheck:
82+
test: wget --quiet --tries=1 --spider https://fakeminio.datajoint.io:443/minio/health/live || exit 1
83+
timeout: 5s
84+
retries: 300
85+
interval: 1s
86+
# ports:
87+
# - "9000:9000"
88+
# - "443:443"
89+
volumes:
90+
- ./tests/nginx/base.conf:/base.conf
91+
- ./tests/nginx/entrypoint.sh:/entrypoint.sh
92+
- ./tests/nginx/fullchain.pem:/certs/fullchain.pem
93+
- ./tests/nginx/privkey.pem:/certs/privkey.pem
7394
networks:
7495
main:

datajoint/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,6 @@
4141
from .attribute_adapter import AttributeAdapter
4242
from . import errors
4343
from .errors import DataJointError
44+
from .migrate import migrate_dj011_external_blob_storage_to_dj012
4445

4546
ERD = Di = Diagram # Aliases for Diagram

datajoint/external.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def delete(self, *, delete_external_files=None, limit=None, display_progress=Tru
316316
raise DataJointError("The delete_external_files argument must be set to either True or False in delete()")
317317

318318
if not delete_external_files:
319-
self.unused.delete_quick()
319+
self.unused().delete_quick()
320320
else:
321321
items = self.unused().fetch_external_paths(limit=limit)
322322
if display_progress:

datajoint/fetch.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def _get(connection, attr, data, squeeze, download_path):
5050
adapt = attr.adapter.get if attr.adapter else lambda x: x
5151

5252
if attr.is_filepath:
53-
return adapt(extern.download_filepath(uuid.UUID(bytes=data))[0])
53+
return str(adapt(extern.download_filepath(uuid.UUID(bytes=data))[0]))
5454

5555
if attr.is_attachment:
5656
# Steps:
@@ -65,22 +65,22 @@ def _get(connection, attr, data, squeeze, download_path):
6565
if local_filepath.is_file():
6666
attachment_checksum = _uuid if attr.is_external else hash.uuid_from_buffer(data)
6767
if attachment_checksum == hash.uuid_from_file(local_filepath, init_string=attachment_name + '\0'):
68-
return adapt(local_filepath) # checksum passed, no need to download again
68+
return str(adapt(local_filepath)) # checksum passed, no need to download again
6969
# generate the next available alias filename
7070
for n in itertools.count():
7171
f = local_filepath.parent / (local_filepath.stem + '_%04x' % n + local_filepath.suffix)
7272
if not f.is_file():
7373
local_filepath = f
7474
break
7575
if attachment_checksum == hash.uuid_from_file(f, init_string=attachment_name + '\0'):
76-
return adapt(f) # checksum passed, no need to download again
76+
return str(adapt(f)) # checksum passed, no need to download again
7777
# Save attachment
7878
if attr.is_external:
7979
extern.download_attachment(_uuid, attachment_name, local_filepath)
8080
else:
8181
# write from buffer
8282
safe_write(local_filepath, data.split(b"\0", 1)[1])
83-
return adapt(local_filepath) # download file from remote store
83+
return str(adapt(local_filepath)) # download file from remote store
8484

8585
return adapt(uuid.UUID(bytes=data) if attr.uuid else (
8686
blob.unpack(extern.get(uuid.UUID(bytes=data)) if attr.is_external else data, squeeze=squeeze)

datajoint/migrate.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
import datajoint as dj
2+
from pathlib import Path
3+
import re
4+
from .utils import user_choice
5+
6+
7+
def migrate_dj011_external_blob_storage_to_dj012(migration_schema, store):
8+
"""
9+
Utility function to migrate external blob data from 0.11 to 0.12.
10+
:param migration_schema: string of target schema to be migrated
11+
:param store: string of target dj.config['store'] to be migrated
12+
"""
13+
if not isinstance(migration_schema, str):
14+
raise ValueError(
15+
'Expected type {} for migration_schema, not {}.'.format(
16+
str, type(migration_schema)))
17+
18+
do_migration = False
19+
do_migration = user_choice(
20+
"""
21+
Warning: Ensure the following are completed before proceeding.
22+
- Appropriate backups have been taken,
23+
- Any existing DJ 0.11.X connections are suspended, and
24+
- External config has been updated to new dj.config['stores'] structure.
25+
Proceed?
26+
""", default='no') == 'yes'
27+
if do_migration:
28+
_migrate_dj011_blob(dj.schema(migration_schema), store)
29+
print('Migration completed for schema: {}, store: {}.'.format(
30+
migration_schema, store))
31+
return
32+
print('No migration performed.')
33+
34+
35+
def _migrate_dj011_blob(schema, default_store):
36+
query = schema.connection.query
37+
38+
LEGACY_HASH_SIZE = 43
39+
40+
legacy_external = dj.FreeTable(
41+
schema.connection,
42+
'`{db}`.`~external`'.format(db=schema.database))
43+
44+
# get referencing tables
45+
refs = query("""
46+
SELECT concat('`', table_schema, '`.`', table_name, '`')
47+
as referencing_table, column_name, constraint_name
48+
FROM information_schema.key_column_usage
49+
WHERE referenced_table_name="{tab}" and referenced_table_schema="{db}"
50+
""".format(
51+
tab=legacy_external.table_name,
52+
db=legacy_external.database), as_dict=True).fetchall()
53+
54+
for ref in refs:
55+
# get comment
56+
column = query(
57+
'SHOW FULL COLUMNS FROM {referencing_table}'
58+
'WHERE Field="{column_name}"'.format(
59+
**ref), as_dict=True).fetchone()
60+
61+
store, comment = re.match(
62+
r':external(-(?P<store>.+))?:(?P<comment>.*)',
63+
column['Comment']).group('store', 'comment')
64+
65+
# get all the hashes from the reference
66+
hashes = {x[0] for x in query(
67+
'SELECT `{column_name}` FROM {referencing_table}'.format(
68+
**ref))}
69+
70+
# sanity check make sure that store suffixes match
71+
if store is None:
72+
assert all(len(_) == LEGACY_HASH_SIZE for _ in hashes)
73+
else:
74+
assert all(_[LEGACY_HASH_SIZE:] == store for _ in hashes)
75+
76+
# create new-style external table
77+
ext = schema.external[store or default_store]
78+
79+
# add the new-style reference field
80+
temp_suffix = 'tempsub'
81+
82+
try:
83+
query("""ALTER TABLE {referencing_table}
84+
ADD COLUMN `{column_name}_{temp_suffix}` {type} DEFAULT NULL
85+
COMMENT ":blob@{store}:{comment}"
86+
""".format(
87+
type=dj.declare.UUID_DATA_TYPE,
88+
temp_suffix=temp_suffix,
89+
store=(store or default_store), comment=comment, **ref))
90+
except:
91+
print('Column already added')
92+
pass
93+
94+
# Copy references into the new external table
95+
# No Windows! Backslashes will cause problems
96+
97+
contents_hash_function = {
98+
'file': lambda ext, relative_path: dj.hash.uuid_from_file(
99+
str(Path(ext.spec['location'], relative_path))),
100+
's3': lambda ext, relative_path: dj.hash.uuid_from_buffer(
101+
ext.s3.get(relative_path))
102+
}
103+
104+
for _hash, size in zip(*legacy_external.fetch('hash', 'size')):
105+
if _hash in hashes:
106+
relative_path = str(Path(schema.database, _hash).as_posix())
107+
uuid = dj.hash.uuid_from_buffer(init_string=relative_path)
108+
external_path = ext._make_external_filepath(relative_path)
109+
if ext.spec['protocol'] == 's3':
110+
contents_hash = dj.hash.uuid_from_buffer(ext._download_buffer(external_path))
111+
else:
112+
contents_hash = dj.hash.uuid_from_file(external_path)
113+
ext.insert1(dict(
114+
filepath=relative_path,
115+
size=size,
116+
contents_hash=contents_hash,
117+
hash=uuid
118+
), skip_duplicates=True)
119+
120+
query(
121+
'UPDATE {referencing_table} '
122+
'SET `{column_name}_{temp_suffix}`=%s '
123+
'WHERE `{column_name}` = "{_hash}"'
124+
.format(
125+
_hash=_hash,
126+
temp_suffix=temp_suffix, **ref), uuid.bytes)
127+
128+
# check that all have been copied
129+
check = query(
130+
'SELECT * FROM {referencing_table} '
131+
'WHERE `{column_name}` IS NOT NULL'
132+
' AND `{column_name}_{temp_suffix}` IS NULL'
133+
.format(temp_suffix=temp_suffix, **ref)).fetchall()
134+
135+
assert len(check) == 0, 'Some hashes havent been migrated'
136+
137+
# drop old foreign key, rename, and create new foreign key
138+
query("""
139+
ALTER TABLE {referencing_table}
140+
DROP FOREIGN KEY `{constraint_name}`,
141+
DROP COLUMN `{column_name}`,
142+
CHANGE COLUMN `{column_name}_{temp_suffix}` `{column_name}`
143+
{type} DEFAULT NULL
144+
COMMENT ":blob@{store}:{comment}",
145+
ADD FOREIGN KEY (`{column_name}`) REFERENCES {ext_table_name}
146+
(`hash`)
147+
""".format(
148+
temp_suffix=temp_suffix,
149+
ext_table_name=ext.full_table_name,
150+
type=dj.declare.UUID_DATA_TYPE,
151+
store=(store or default_store), comment=comment, **ref))
152+
153+
# Drop the old external table but make sure it's no longer referenced
154+
# get referencing tables
155+
refs = query("""
156+
SELECT concat('`', table_schema, '`.`', table_name, '`') as
157+
referencing_table, column_name, constraint_name
158+
FROM information_schema.key_column_usage
159+
WHERE referenced_table_name="{tab}" and referenced_table_schema="{db}"
160+
""".format(
161+
tab=legacy_external.table_name,
162+
db=legacy_external.database), as_dict=True).fetchall()
163+
164+
assert not refs, 'Some references still exist'
165+
166+
# drop old external table
167+
legacy_external.drop_quick()

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ pyparsing
44
ipython
55
pandas
66
tqdm
7-
networkx
7+
networkx<2.4
88
pydot
99
minio
1010
matplotlib

tests/nginx/base.conf

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
server {
2+
listen 9000;
3+
server_name {{SUBDOMAINS}}.{{URL}};
4+
client_max_body_size 0;
5+
proxy_buffering off;
6+
ignore_invalid_headers off;
7+
8+
location / {
9+
access_log off;
10+
proxy_http_version 1.1;
11+
proxy_set_header Host $http_host;
12+
proxy_pass {{MINIO_SERVER}};
13+
}
14+
}
15+
16+
server {
17+
listen 443 ssl;
18+
server_name {{SUBDOMAINS}}.{{URL}};
19+
client_max_body_size 0;
20+
proxy_buffering off;
21+
ignore_invalid_headers off;
22+
23+
ssl_certificate /certs/fullchain.pem;
24+
ssl_certificate_key /certs/privkey.pem;
25+
26+
# session settings
27+
ssl_session_timeout 1d;
28+
ssl_session_cache shared:SSL:50m;
29+
ssl_session_tickets off;
30+
31+
# protocols
32+
ssl_protocols TLSv1.2 TLSv1.3;
33+
ssl_prefer_server_ciphers on;
34+
ssl_ciphers 'ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256';
35+
36+
# OCSP Stapling
37+
ssl_stapling on;
38+
ssl_stapling_verify on;
39+
resolver 127.0.0.11 valid=30s; # Docker DNS Server
40+
41+
location / {
42+
access_log off;
43+
proxy_http_version 1.1;
44+
proxy_set_header Host $http_host;
45+
proxy_pass {{MINIO_SERVER}};
46+
}
47+
}

tests/nginx/entrypoint.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#! /bin/sh
2+
sed "s|{{SUBDOMAINS}}|${SUBDOMAINS}|g" /base.conf | tee /etc/nginx/conf.d/base.conf
3+
sed -i "s|{{URL}}|${URL}|g" /etc/nginx/conf.d/base.conf
4+
sed -i "s|{{MINIO_SERVER}}|${MINIO_SERVER}|g" /etc/nginx/conf.d/base.conf
5+
nginx -g "daemon off;"

tests/nginx/fullchain.pem

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
-----BEGIN CERTIFICATE-----
2+
MIIGZDCCBUygAwIBAgISA10k5JmyN2nyzqvMRyO2LntJMA0GCSqGSIb3DQEBCwUA
3+
MEoxCzAJBgNVBAYTAlVTMRYwFAYDVQQKEw1MZXQncyBFbmNyeXB0MSMwIQYDVQQD
4+
ExpMZXQncyBFbmNyeXB0IEF1dGhvcml0eSBYMzAeFw0xOTEwMjEyMTAwMjFaFw0y
5+
MDAxMTkyMTAwMjFaMCExHzAdBgNVBAMTFmZha2VtaW5pby5kYXRham9pbnQuaW8w
6+
ggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDYwRR1YoZ1pJgZO+oU6zla
7+
47UiTnMO2KwYaS3VAPa1ks9nmQBInH7nA3i3wIzCiX+CeKCsiaKsjA5xlQmtmeM4
8+
FrL5U1ZBUyhroowRSluVyni9g3uJO/NG29BiWU+NBcwtsApbVUXRp4v9BQ2KgRZS
9+
KhK74XLXu1/6NRl3sjzye6MkfTo1rkzmm+pnFvBDkPEdI8/R7mBTQFXTSXzrqo+5
10+
ZNBY3sYWpGVrLOi+LRvFJR6kNs1z1cxOYGXQRMBFNMyu4xZAYDaowR+HVQ0OsQYw
11+
90PeuakMyB5qeIPe1zelfqP+/HO6L9MTZdLKBNm5YkJYrVm2YD5BcVCDJeUkAact
12+
DKW/AX2FL983D0WepM4mPm1MuRqpYVSEne3eeA4/Gm8TVpmqQyuW10HJbCsgZR9g
13+
X/gokz54uguHu7SZHvLuadoWzxMADFSvbOoM52rFgCsKeZecNDi9H54yAHlhjIj7
14+
Fs9zVRkELil5U2Fnolw8gOyfV/2ghqor8Y4950fcuy9DldcKeCmpjjGoemff/REL
15+
p4tgib+XAX/3bVmfgW4aTW1RoQ+duThfPovzumPXxffXNrRlstX7IaR/Asz0bhSJ
16+
C91vmemedgyExcUSuyqX2qzJrgdx5TCBpP+J47b5oHdjS9uWeg5BX7JpofiR/klP
17+
5OADP/F2a68aWgox7Z+CRQIDAQABo4ICazCCAmcwDgYDVR0PAQH/BAQDAgWgMB0G
18+
A1UdJQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjAMBgNVHRMBAf8EAjAAMB0GA1Ud
19+
DgQWBBSZJjFCP4IjST+FfsC54OKupBDWYTAfBgNVHSMEGDAWgBSoSmpjBH3duubR
20+
ObemRWXv86jsoTBvBggrBgEFBQcBAQRjMGEwLgYIKwYBBQUHMAGGImh0dHA6Ly9v
21+
Y3NwLmludC14My5sZXRzZW5jcnlwdC5vcmcwLwYIKwYBBQUHMAKGI2h0dHA6Ly9j
22+
ZXJ0LmludC14My5sZXRzZW5jcnlwdC5vcmcvMCEGA1UdEQQaMBiCFmZha2VtaW5p
23+
by5kYXRham9pbnQuaW8wTAYDVR0gBEUwQzAIBgZngQwBAgEwNwYLKwYBBAGC3xMB
24+
AQEwKDAmBggrBgEFBQcCARYaaHR0cDovL2Nwcy5sZXRzZW5jcnlwdC5vcmcwggEE
25+
BgorBgEEAdZ5AgQCBIH1BIHyAPAAdwCyHgXMi6LNiiBOh2b5K7mKJSBna9r6cOey
26+
SVMt74uQXgAAAW3wVdvZAAAEAwBIMEYCIQDeOafRS+nnooUWcFQlH82sK2lTrR3N
27+
uJqKJLIeoJdJVwIhAI+tVJJ103wbH6bC/ZwuRDlB/Omya0QwwO4m4Af4u/SEAHUA
28+
8JWkWfIA0YJAEC0vk4iOrUv+HUfjmeHQNKawqKqOsnMAAAFt8FXd3gAABAMARjBE
29+
AiAD8IITk6e1Ms01r2SUBUwaIwAA5z6NqYK8YBudhHRU6gIgBAzTx3OLwKo7aOjY
30+
8rf03Mcttz72VDI1dIDPt9vXxEcwDQYJKoZIhvcNAQELBQADggEBAFeAxIE70OgD
31+
1hx34hdJzfSOPUm3bjReUdfif6LTNYhEK1KjEKDwNK7r978t3wcOuxuJAwBdClzE
32+
dE/7EfuZilXWjVJ2La4J0DdQcrjt+O4bvFghNTWsOoYl5X0LzgKZLbl/9hvK8cE3
33+
/d3Pjf0zHflT0pJYjLP89ntwKJdFsAjFQc1+kX85SehYIj9c7t/W5/9MDhtebtvj
34+
Os1inUb4l15jbGTO3po8tPmmHLAvfTM6d/KIGueLHAn63EzCg1tmnQUjhhM1Zyzl
35+
Djdshrw0nr6BFOJvw/h/DYo6MqtLuTlrVjfdULjkqH5wq2wh7gqnGcQbqcI8Eixd
36+
SQbaP7xJreA=
37+
-----END CERTIFICATE-----
38+
-----BEGIN CERTIFICATE-----
39+
MIIEkjCCA3qgAwIBAgIQCgFBQgAAAVOFc2oLheynCDANBgkqhkiG9w0BAQsFADA/
40+
MSQwIgYDVQQKExtEaWdpdGFsIFNpZ25hdHVyZSBUcnVzdCBDby4xFzAVBgNVBAMT
41+
DkRTVCBSb290IENBIFgzMB4XDTE2MDMxNzE2NDA0NloXDTIxMDMxNzE2NDA0Nlow
42+
SjELMAkGA1UEBhMCVVMxFjAUBgNVBAoTDUxldCdzIEVuY3J5cHQxIzAhBgNVBAMT
43+
GkxldCdzIEVuY3J5cHQgQXV0aG9yaXR5IFgzMIIBIjANBgkqhkiG9w0BAQEFAAOC
44+
AQ8AMIIBCgKCAQEAnNMM8FrlLke3cl03g7NoYzDq1zUmGSXhvb418XCSL7e4S0EF
45+
q6meNQhY7LEqxGiHC6PjdeTm86dicbp5gWAf15Gan/PQeGdxyGkOlZHP/uaZ6WA8
46+
SMx+yk13EiSdRxta67nsHjcAHJyse6cF6s5K671B5TaYucv9bTyWaN8jKkKQDIZ0
47+
Z8h/pZq4UmEUEz9l6YKHy9v6Dlb2honzhT+Xhq+w3Brvaw2VFn3EK6BlspkENnWA
48+
a6xK8xuQSXgvopZPKiAlKQTGdMDQMc2PMTiVFrqoM7hD8bEfwzB/onkxEz0tNvjj
49+
/PIzark5McWvxI0NHWQWM6r6hCm21AvA2H3DkwIDAQABo4IBfTCCAXkwEgYDVR0T
50+
AQH/BAgwBgEB/wIBADAOBgNVHQ8BAf8EBAMCAYYwfwYIKwYBBQUHAQEEczBxMDIG
51+
CCsGAQUFBzABhiZodHRwOi8vaXNyZy50cnVzdGlkLm9jc3AuaWRlbnRydXN0LmNv
52+
bTA7BggrBgEFBQcwAoYvaHR0cDovL2FwcHMuaWRlbnRydXN0LmNvbS9yb290cy9k
53+
c3Ryb290Y2F4My5wN2MwHwYDVR0jBBgwFoAUxKexpHsscfrb4UuQdf/EFWCFiRAw
54+
VAYDVR0gBE0wSzAIBgZngQwBAgEwPwYLKwYBBAGC3xMBAQEwMDAuBggrBgEFBQcC
55+
ARYiaHR0cDovL2Nwcy5yb290LXgxLmxldHNlbmNyeXB0Lm9yZzA8BgNVHR8ENTAz
56+
MDGgL6AthitodHRwOi8vY3JsLmlkZW50cnVzdC5jb20vRFNUUk9PVENBWDNDUkwu
57+
Y3JsMB0GA1UdDgQWBBSoSmpjBH3duubRObemRWXv86jsoTANBgkqhkiG9w0BAQsF
58+
AAOCAQEA3TPXEfNjWDjdGBX7CVW+dla5cEilaUcne8IkCJLxWh9KEik3JHRRHGJo
59+
uM2VcGfl96S8TihRzZvoroed6ti6WqEBmtzw3Wodatg+VyOeph4EYpr/1wXKtx8/
60+
wApIvJSwtmVi4MFU5aMqrSDE6ea73Mj2tcMyo5jMd6jmeWUHK8so/joWUoHOUgwu
61+
X4Po1QYz+3dszkDqMp4fklxBwXRsW10KXzPMTZ+sOPAveyxindmjkW8lGy+QsRlG
62+
PfZ+G6Z6h7mjem0Y+iWlkYcV4PIWL1iwBi8saCbGS5jN2p8M+X+Q7UNKEkROb3N6
63+
KOqkqm57TH2H3eDJAkSnh6/DNFu0Qg==
64+
-----END CERTIFICATE-----

0 commit comments

Comments
 (0)