3737from invenio_pidstore .models import PersistentIdentifier
3838from invenio_records_files .api import Record
3939from invenio_records_files .models import RecordsBuckets
40+ from invenio_sipstore .models import SIPMetadataType
4041from sqlalchemy .orm .attributes import flag_modified
4142
4243from cernopendata .modules .records .minters .docid import \
4344 cernopendata_docid_minter
4445from cernopendata .modules .records .minters .recid import \
4546 cernopendata_recid_minter
4647
48+ from .sip_utils import (
49+ handle_sipstore_record_file_index ,
50+ handle_sipstore_record_file ,
51+ sip_record ,
52+ )
4753
4854def get_jsons_from_dir (dir ):
4955 """Get JSON files inside a dir."""
@@ -55,8 +61,9 @@ def get_jsons_from_dir(dir):
5561 return res
5662
5763
58- def handle_record_files (data , bucket , files , skip_files ):
64+ def handle_record_files (data , bucket , files , skip_files , skip_sips ):
5965 """Handles record files."""
66+ sip_files = []
6067 for file in files :
6168 if skip_files :
6269 break
@@ -89,45 +96,80 @@ def handle_record_files(data, bucket, files, skip_files):
8996 str (e )))
9097 continue
9198
99+ if not skip_sips :
100+ if file .get ("type" , None ) == "index.json" :
101+ sip_files += handle_sipstore_record_file_index (f )
92102
93- def create_record (schema , data , files , skip_files ):
103+ return sip_files
104+
105+
106+ def handle_sip_files (files , skip_files , skip_sips ):
107+ """Handles record files."""
108+ sip_files = []
109+ for file in files :
110+ if skip_files :
111+ break
112+ assert 'uri' in file
113+ assert 'size' in file
114+ assert 'checksum' in file
115+ f = FileInstance .get_by_uri (file .get ("uri" ))
116+
117+ if f and not skip_sips :
118+ if file .get ("type" , None ) == "index.json" :
119+ sip_files += handle_sipstore_record_file_index (f )
120+
121+ return sip_files
122+
123+
124+ def create_record (schema , data , files , skip_files , skip_sips ):
94125 """Creates a new record."""
95126 id = uuid .uuid4 ()
96- cernopendata_recid_minter (id , data )
127+ pid = cernopendata_recid_minter (id , data )
128+
97129 data ['$schema' ] = schema
98130 record = Record .create (data , id_ = id )
99131 if not skip_files :
100132 bucket = Bucket .create ()
101- handle_record_files (data , bucket , files , skip_files )
133+ sip_files_content = handle_record_files (
134+ data , bucket , files , skip_files , skip_sips )
135+
102136 RecordsBuckets .create (
103137 record = record .model , bucket = bucket )
104138
105- return record
139+ return pid , record , sip_files_content
106140
107141
108- def update_record (pid , schema , data , files , skip_files ):
142+ def update_record (pid , schema , data , files , skip_files , skip_sips ):
109143 """Updates the given record."""
110144 record = Record .get_record (pid .object_uuid )
111- with db .session .begin_nested ():
112- if record .files and not skip_files :
113- bucket_id = record .files .bucket
114- bucket = Bucket .get (bucket_id .id )
115- for o in ObjectVersion .get_by_bucket (bucket ).all ():
116- o .remove ()
117- o .file .delete ()
118- RecordsBuckets .query .filter_by (
119- record = record .model ,
120- bucket = bucket
121- ).delete ()
122- bucket_id .remove ()
123- db .session .commit ()
145+ # with db.session.begin_nested():
146+ # if record.files and not skip_files:
147+ # bucket_id = record.files.bucket
148+ # bucket = Bucket.get(bucket_id.id)
149+ # for o in ObjectVersion.get_by_bucket(bucket).all():
150+ # o.remove()
151+ # o.file.delete()
152+ # RecordsBuckets.query.filter_by(
153+ # record=record.model,
154+ # bucket=bucket
155+ # ).delete()
156+ # bucket_id.remove()
157+ # db.session.commit()
158+
124159 record .update (data )
160+ sip_files_content = []
125161 if not skip_files :
126- bucket = Bucket .create ()
127- handle_record_files (data , bucket , files , skip_files )
128- RecordsBuckets .create (
129- record = record .model , bucket = bucket )
130- return record
162+ sip_files_content = handle_sip_files (
163+ files ,
164+ skip_files ,
165+ skip_sips
166+ )
167+ # bucket = Bucket.create()
168+ # sip_files_content = handle_record_files(
169+ # data, bucket, files, skip_files, skip_sips)
170+ # RecordsBuckets.create(
171+ # record=record.model, bucket=bucket)
172+ return record , sip_files_content
131173
132174
133175def create_doc (data , schema ):
@@ -156,6 +198,8 @@ def fixtures():
156198@fixtures .command ()
157199@click .option ('--skip-files' , is_flag = True , default = False ,
158200 help = 'Skip loading of files' )
201+ @click .option ('--skip-sips' , is_flag = True , default = False ,
202+ help = 'Skip create/update of SIPs' )
159203@click .option ('files' , '--file' , '-f' , multiple = True ,
160204 type = click .Path (exists = True ),
161205 help = 'Path to the file(s) to be loaded. If not provided, all'
@@ -165,8 +209,9 @@ def fixtures():
165209@click .option ('--mode' , required = True , type = click .Choice (
166210 ['insert' , 'replace' , 'insert-or-replace' ]))
167211@with_appcontext
168- def records (skip_files , files , profile , mode ):
212+ def records (skip_files , skip_sips , files , profile , mode ):
169213 """Load all records."""
214+
170215 if profile :
171216 import cProfile
172217 import pstats
@@ -187,31 +232,34 @@ def records(skip_files, files, profile, mode):
187232 else :
188233 record_json = glob .glob (os .path .join (data , '*.json' ))
189234
235+
190236 for filename in record_json :
191237 # name = filename.split('/')[-1]
192238 # if name.startswith('opera'):
193239 # click.echo('Skipping opera records ...')
194240 # continue
241+
195242 click .echo ('Loading records from {0} ...' .format (filename ))
196243 with open (filename , 'rb' ) as source :
197244 for data in json .load (source ):
198-
199245 if not data :
200246 click .echo ('IGNORING a possibly broken or corrupted '
201247 'record entry in file {0} ...' .format (filename ))
202248 continue
203249
204250 files = data .get ('files' , [])
205251
252+ pid = None
206253 if mode == 'insert-or-replace' :
207254 try :
208255 pid = PersistentIdentifier .get ('recid' , data ['recid' ])
209256 if pid :
210- record = update_record (
211- pid , schema , data , files , skip_files )
257+ record , sip_files_content = update_record (
258+ pid , schema , data , files , skip_files , skip_sips )
212259 action = 'updated'
213260 except PIDDoesNotExistError :
214- record = create_record (schema , data , files , skip_files )
261+ pid , record , sip_files_content = create_record (
262+ schema , data , files , skip_files , skip_sips )
215263 action = 'inserted'
216264 elif mode == 'insert' :
217265 try :
@@ -223,7 +271,8 @@ def records(skip_files, files, profile, mode):
223271 data .get ('recid' )), err = True )
224272 return
225273 except PIDDoesNotExistError :
226- record = create_record (schema , data , files , skip_files )
274+ pid , record , sip_files_content = create_record (
275+ schema , data , files , skip_files , skip_sips )
227276 action = 'inserted'
228277 else :
229278 try :
@@ -234,13 +283,20 @@ def records(skip_files, files, profile, mode):
234283 'cannot replace it.' .format (
235284 data .get ('recid' )), err = True )
236285 return
237- record = update_record (
238- pid , schema , data , files , skip_files )
286+ record , sip_files_content = update_record (
287+ pid , schema , data , files , skip_files , skip_sips )
239288 action = 'updated'
240289
290+
291+
241292 if not skip_files :
242293 record .files .flush ()
243294 record .commit ()
295+
296+ if not skip_sips :
297+ sip_record (pid , record , sip_files_content , action )
298+ # sip_record(pid, record, ''.join(sip_files_content), action)
299+
244300 db .session .commit ()
245301 click .echo (
246302 'Record recid {0} {1}.' .format (
@@ -462,3 +518,49 @@ def pids():
462518 db .session .add (record )
463519 db .session .commit ()
464520 db .session .expunge_all ()
521+
522+
523+
524+ @fixtures .command ()
525+ @with_appcontext
526+ def sipmetadata ():
527+ """Load sipmetadata types."""
528+ data = [
529+ {
530+ "title" : "CERN Open Data Record JSON" ,
531+ "name" : "record-json" ,
532+ "format" : "json" ,
533+ "schema" : current_app .extensions ['invenio-jsonschemas' ] \
534+ .path_to_url ('records/record-v1.0.0.json' )
535+ },
536+ {
537+ "title" : "CERN Open Data Docs JSON" ,
538+ "name" : "docs-json" ,
539+ "format" : "json" ,
540+ "schema" : current_app .extensions ['invenio-jsonschemas' ] \
541+ .path_to_url ('records/docs-v1.0.0.json' )
542+ },
543+ {
544+ "title" : "CERN Open Data Glossary JSON" ,
545+ "name" : "glossary-json" ,
546+ "format" : "json" ,
547+ "schema" : current_app .extensions ['invenio-jsonschemas' ] \
548+ .path_to_url ('records/glossary-term-v1.0.0.json' )
549+ },
550+ {
551+ "title" : "BagIt Archiver metadata" ,
552+ "name" : "bagit" ,
553+ "format" : "json" ,
554+ "schema" : current_app .extensions ['invenio-jsonschemas' ] \
555+ .path_to_url ('sipstore/bagit-v1.0.0.json' )
556+ }
557+ ]
558+
559+ click .secho ('Loading SIP metadata types...' , fg = 'blue' )
560+ with click .progressbar (data ) as types :
561+ with db .session .begin_nested ():
562+ for type in types :
563+ db .session .add (SIPMetadataType (** type ))
564+ db .session .commit ()
565+ click .secho ('SIP metadata types loaded!' , fg = 'green' )
566+
0 commit comments