22import json
33import requests
44import re
5- import rq
65from logging import getLogger
76
87from typing import Any , Optional , Dict , cast , List
98from ckan .types import Context , DataDict
109
11- from ckan .plugins .toolkit import _ , config , get_action
12- from ckan .lib .redis import connect_to_redis
10+ from ckan .plugins .toolkit import _ , config , get_action , enqueue_job
11+ from ckan .lib .jobs import add_queue_name_prefix
1312
1413from ckanext .datastore .logic .action import datastore_search_sql
1514from ckanext .datastore .backend .postgres import identifier
2221
2322MAX_ERR_LEN = 1000
2423PSQL_TO_SOLR_WILCARD_MATCH = re .compile ('^_?|_?$' )
25- REDIS_QUEUE_NAME = 'ckan_ds_solr_core_create'
2624
2725log = getLogger (__name__ )
2826DEBUG = config .get ('debug' , False )
2927
30- _ds_solr_queues : Dict [str , rq .Queue ] = {}
31-
3228
3329class DatastoreSolrBackend (DatastoreSearchBackend ):
3430 """
3531 SOLR class for datastore search backend.
3632 """
3733 timeout = config .get ('solr_timeout' )
3834 default_search_fields = ['_id' , '_version_' , 'indexed_ts' , '_text_' ]
35+ configset_name = config .get ('ckanext.datastore_search.solr.configset' ,
36+ 'datastore_resource' )
3937
4038 @property
4139 def field_type_map (self ):
@@ -95,30 +93,6 @@ def _make_connection(self, resource_id: str) -> Optional[pysolr.Solr]:
9593 except pysolr .SolrError :
9694 pass
9795
98- def _make_or_create_connection (self , resource_id : str ) -> Optional [pysolr .Solr ]:
99- """
100- Tries to make a SOLR connection to a core,
101- otherwise tries to creates a new core.
102- """
103- core_name = f'{ self .prefix } { resource_id } '
104- conn = self ._make_connection (resource_id )
105- if conn :
106- return conn
107- ds_result = get_action ('datastore_search' )(
108- self ._get_site_context (), {'resource_id' : resource_id ,
109- 'limit' : 0 ,
110- 'skip_search_engine' : True })
111- create_dict = {
112- 'resource_id' : resource_id ,
113- 'fields' : [f for f in ds_result ['fields' ] if
114- f ['id' ] not in self .default_search_fields ]}
115- self .create (self ._get_site_context (), create_dict )
116- conn = self ._make_connection (resource_id )
117- if conn :
118- return conn
119- raise DatastoreSearchException (
120- _ ('Could not connect to SOLR core %s' ) % core_name )
121-
12296 def _send_api_request (self ,
12397 method : str ,
12498 endpoint : str ,
@@ -158,7 +132,10 @@ def reindex(self,
158132 # DS Resource could take a long time??
159133 context = self ._get_site_context ()
160134 core_name = f'{ self .prefix } { resource_id } '
161- conn = self ._make_or_create_connection (resource_id ) if not connection else connection
135+ conn = self ._make_connection (resource_id ) if not connection else connection
136+
137+ if not conn :
138+ return
162139
163140 errmsg = _ ('Could not reload SOLR core %s' ) % core_name
164141 resp = self ._send_api_request (method = 'POST' ,
@@ -189,9 +166,9 @@ def reindex(self,
189166 indexed_ids = []
190167 while gathering_solr_records :
191168 solr_records = self .search (
192- context , {'resource_id' : resource_id ,
193- 'limit' : 1000 ,
194- 'offset' : offset },
169+ {'resource_id' : resource_id ,
170+ 'limit' : 1000 ,
171+ 'offset' : offset },
195172 conn )
196173 if not solr_records :
197174 gathering_solr_records = False
@@ -266,7 +243,6 @@ def _check_counts(self,
266243 self .reindex (resource_id , connection , only_missing = True )
267244
268245 def create (self ,
269- context : Context ,
270246 data_dict : DataDict ,
271247 connection : Optional [pysolr .Solr ] = None ) -> Any :
272248 """
@@ -276,35 +252,24 @@ def create(self,
276252 core_name = f'{ self .prefix } { resource_id } '
277253 conn = self ._make_connection (resource_id ) if not connection else connection
278254 if not conn :
279- # FIXME: using configSet in API does not copy the configSet
280- # into the core conf directory. We need to send some type
281- # of signal to the SOLR server so it can run
282- # solr create -c core_name -d configsets/datastore_resource
283- # then does SOLR server need to send a signal back?
284- # or can we just keep retrying a couple of times??
285- global _ds_solr_queues
286- redis_queue = None
287255 errmsg = _ ('Could not create SOLR core %s' ) % core_name
288- if REDIS_QUEUE_NAME in _ds_solr_queues :
289- redis_queue = _ds_solr_queues [REDIS_QUEUE_NAME ]
290- else :
291- redis_conn = connect_to_redis ()
292- redis_queue = _ds_solr_queues [REDIS_QUEUE_NAME ] = \
293- rq .Queue (REDIS_QUEUE_NAME , connection = redis_conn )
294- if not redis_queue :
295- raise DatastoreSearchException (errmsg )
296- job = redis_queue .enqueue_call (
297- 'create_solr_core.proc._create_solr_core' ,
298- args = [core_name , 'datastore_resource' ],
299- timeout = 60 )
300- if not job .meta :
301- job .meta = {}
302- job .meta ['title' ] = 'SOLR Core creation %s' % core_name
303- job .save ()
256+ callback_queue = add_queue_name_prefix (self .redis_callback_queue_name )
257+ enqueue_job (fn = 'solr_utils.create_solr_core.proc.create_solr_core' ,
258+ kwargs = {
259+ 'core_name' : core_name ,
260+ 'config_set' : self .configset_name ,
261+ 'callback_fn' : 'ckanext.datastore_search.logic.'
262+ 'action.datastore_search_create_callback' ,
263+ 'callback_queue' : callback_queue ,
264+ 'callback_timeout' : config .get ('ckan.jobs.timeout' , 300 )},
265+ title = 'SOLR Core creation %s' % core_name ,
266+ queue = self .redis_queue_name ,
267+ rq_kwargs = {'timeout' : 60 })
304268 log .debug ('Enqueued SOLR Core creation for DataStore Resource %s ' %
305269 resource_id )
306- # TODO: await or retry here???
307- conn = self ._make_connection (resource_id )
270+ # we return here as we do not know how long the background
271+ # job to create the new SOLR core will take.
272+ return
308273 if not conn :
309274 raise DatastoreSearchException (
310275 _ ('Could not connect to SOLR core %s' ) % core_name )
@@ -399,20 +364,46 @@ def create(self,
399364 self .reindex (resource_id , connection = conn )
400365
401366 if 'records' in data_dict :
402- self .upsert (context , data_dict , connection = conn )
367+ self .upsert (data_dict , connection = conn )
403368
404369 self ._check_counts (resource_id , connection = conn )
405370
371+ def create_callback (self , data_dict : DataDict ) -> Any :
372+ """
373+ Callback from the REDIS queue via SOLR server
374+ after successful creation of the SOLR core.
375+ """
376+ if data_dict .get ('exit_code' ):
377+ log .debug ('SOLR core creation exit_code: %s' % data_dict .get ('exit_code' ))
378+ if data_dict .get ('stdout' ):
379+ log .debug ('SOLR core creation stdout: %s' % data_dict .get ('stdout' ))
380+ if data_dict .get ('stderr' ):
381+ log .debug ('SOLR core creation stderr: %s' % data_dict .get ('stderr' ))
382+
383+ resource_id = data_dict .get ('core_name' , '' ).replace (self .prefix , '' )
384+
385+ ds_result = get_action ('datastore_search' )(
386+ self ._get_site_context (), {'resource_id' : resource_id ,
387+ 'limit' : 0 ,
388+ 'skip_search_engine' : True })
389+ create_dict = {
390+ 'resource_id' : resource_id ,
391+ 'fields' : [f for f in ds_result ['fields' ] if
392+ f ['id' ] not in self .default_search_fields ]}
393+ self .create (create_dict )
394+
406395 def upsert (self ,
407- context : Context ,
408396 data_dict : DataDict ,
409397 connection : Optional [pysolr .Solr ] = None ) -> Any :
410398 """
411399 Insert records into the SOLR index.
412400 """
413401 resource_id = data_dict .get ('resource_id' )
414402 core_name = f'{ self .prefix } { resource_id } '
415- conn = self ._make_or_create_connection (resource_id ) if not connection else connection
403+ conn = self ._make_connection (resource_id ) if not connection else connection
404+
405+ if not conn :
406+ return
416407
417408 if data_dict ['records' ]:
418409 for r in data_dict ['records' ]:
@@ -430,7 +421,6 @@ def upsert(self,
430421 self ._check_counts (resource_id , connection = conn )
431422
432423 def search (self ,
433- context : Context ,
434424 data_dict : DataDict ,
435425 connection : Optional [pysolr .Solr ] = None ) -> Optional [List [Dict [str , Any ]]]:
436426 """
@@ -440,7 +430,11 @@ def search(self,
440430 return
441431
442432 resource_id = data_dict .get ('resource_id' )
443- conn = self ._make_or_create_connection (resource_id ) if not connection else connection
433+ conn = self ._make_connection (resource_id ) if not connection else connection
434+
435+ if not conn :
436+ raise DatastoreSearchException (
437+ _ ('SOLR core does not exist for DataStore Resource %s' ) % resource_id )
444438
445439 query = data_dict .get ('q' , {})
446440 filters = data_dict .get ('filters' , {})
@@ -485,15 +479,17 @@ def search(self,
485479 return results .docs
486480
487481 def delete (self ,
488- context : Context ,
489482 data_dict : DataDict ,
490483 connection : Optional [pysolr .Solr ] = None ) -> Any :
491484 """
492485 Removes records from the SOLR index, or deletes the core entirely.
493486 """
494487 resource_id = data_dict .get ('resource_id' )
495488 core_name = f'{ self .prefix } { resource_id } '
496- conn = self ._make_or_create_connection (resource_id ) if not connection else connection
489+ conn = self ._make_connection (resource_id ) if not connection else connection
490+
491+ if not conn :
492+ return
497493
498494 if not data_dict .get ('filters' ):
499495 errmsg = _ ('Could not delete SOLR core %s' ) % core_name
0 commit comments