adsabs
diff --git a/‎adsmp/app.py‎
Lines changed: 6 additions & 4 deletions b/‎adsmp/app.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎adsmp/tasks.py‎
Lines changed: 16 additions & 11 deletions b/‎adsmp/tasks.py‎
Lines changed: 16 additions & 11 deletions
@@ -763,7 +763,7 @@ def should_include_in_sitemap(self, record):
         3. If processed, processing isn't too stale
         
         Args:
-            record: Dictionary with record data including bib_data, status, timestamps
+            record: Dictionary with record data including has_bib_data, status, timestamps
             
         Returns:
             bool: True if record should be included in sitemap, False otherwise
@@ -772,14 +772,14 @@ def should_include_in_sitemap(self, record):
 
         # Extract values from record dictionary
         bibcode = record.get('bibcode', None)
-        bib_data = record.get('bib_data', None)
+        has_bib_data = record.get('has_bib_data', None)
         bib_data_updated = record.get('bib_data_updated')
         solr_processed = record.get('solr_processed') 
         status = record.get('status')
 
         # Must have bibliographic data
-        if not bib_data or not bibcode or (isinstance(bib_data, str) and not bib_data.strip()):
-            self.logger.debug('Excluding %s from sitemap: No bibcode or bib_data', bibcode)
+        if not has_bib_data or not bibcode:
+            self.logger.debug('Excluding %s from sitemap: No bibcode or has_bib_data is False', bibcode)
             return False
 
          # Exclude if SOLR failed or if record is being retried (previously failed)
@@ -828,6 +828,8 @@ def get_records_bulk(self, bibcodes, session, load_only=None):
             record_data = {}
             for field in (load_only or ['id', 'bibcode', 'bib_data', 'bib_data_updated', 'solr_processed', 'status']):
                 record_data[field] = getattr(record, field, None)
+            # Add has_bib_data boolean for sitemap checks
+            record_data['has_bib_data'] = bool(record_data.get('bib_data'))
             records_dict[record.bibcode] = record_data
 
         return records_dict
 
@@ -157,13 +157,15 @@ def task_update_record(msg):
                 record = app.update_storage(m.bibcode, 'nonbib_data', m.toJSON())
                 if record:
                     logger.debug('Saved record from list: %s', record)
+                    _generate_boost_request(m, type)
         elif type == 'metrics_records':
             for m in msg.metrics_records:
                 m = Msg(m, None, None)
                 bibcodes.append(m.bibcode)
                 record = app.update_storage(m.bibcode, 'metrics', m.toJSON(including_default_value_fields=True))
                 if record:
                     logger.debug('Saved record from list: %s', record)
+                    _generate_boost_request(m, type)
         elif type == 'augment':
             bibcodes.append(msg.bibcode)
             record = app.update_storage(msg.bibcode, 'augment',
@@ -176,22 +178,25 @@ def task_update_record(msg):
             record = app.update_storage(msg.bibcode, type, msg.toJSON())
             if record:
                 logger.debug('Saved record: %s', record)
+                _generate_boost_request(msg, type)
             if type == 'metadata':
                 # with new bib data we request to augment the affiliation
                 # that pipeline will eventually respond with a msg to task_update_record
                 logger.debug('requesting affilation augmentation for %s', msg.bibcode)
                 app.request_aff_augment(msg.bibcode)
-        if record:                        
-            # Send payload to Boost pipeline
-            if type != 'boost' and not app._config.get('TESTING_MODE', False):
-                try:
-                    task_boost_request.apply_async(args=(msg.bibcode,))
-                except Exception as e:
-                    app.logger.exception('Error generating boost request message for bibcode %s: %s', msg.bibcode, e)
-
     else:
         logger.error('Received a message with unclear status: %s', msg)
 
+def _generate_boost_request(msg, msg_type):
+    # Send payload to Boost pipeline
+    if msg_type not in app._config.get('IGNORED_BOOST_PAYLOAD_TYPES', ['boost']) and not app._config.get('TESTING_MODE', False):
+        try:
+            task_boost_request.apply_async(args=(msg.bibcode,))
+        except Exception as e:
+            app.logger.exception('Error generating boost request message for bibcode %s: %s', msg.bibcode, e)
+    else:
+        app.logger.debug("Message for bibcode %s has type: %s, Skipping.".format(msg.bibcode, msg_type))
+
 @app.task(queue='update-scixid')
 def task_update_scixid(bibcodes, flag):
     """Receives bibcodes to add scix id to the record.
@@ -490,7 +495,7 @@ def task_cleanup_invalid_sitemaps():
                 session.query(
                     SitemapInfo.id,  
                     SitemapInfo.bibcode,
-                    Records.bib_data,
+                    (Records.bib_data.isnot(None)).label('has_bib_data'),
                     Records.bib_data_updated,
                     Records.solr_processed,
                     Records.status
@@ -519,7 +524,7 @@ def task_cleanup_invalid_sitemaps():
                 # Convert to dict for should_include_in_sitemap function
                 record_dict = {
                     'bibcode': record_data.bibcode,
-                    'bib_data': record_data.bib_data,
+                    'has_bib_data': record_data.has_bib_data,
                     'bib_data_updated': record_data.bib_data_updated,
                     'solr_processed': record_data.solr_processed,
                     'status': record_data.status
@@ -688,7 +693,7 @@ def task_manage_sitemap(bibcodes, action):
                         # Apply SOLR filtering - convert record to dict for should_include_in_sitemap
                         record_dict = {
                             'bibcode': record.bibcode,
-                            'bib_data': record.bib_data,
+                            'has_bib_data': bool(record.bib_data),
                             'bib_data_updated': record.bib_data_updated,
                             'solr_processed': record.solr_processed,
                             'status': record.status