@@ -577,6 +577,9 @@ def _submit_requests( # noqa
577577 ) -> dict :
578578 """Handle submitting requests sequentially with pagination.
579579
580+ If criteria contains comma-separated parameters (except those that are naturally comma-separated),
581+ split them into multiple sequential requests and combine results.
582+
580583 Arguments:
581584 criteria: dictionary of criteria to filter down
582585 url: url used to make request
@@ -588,22 +591,148 @@ def _submit_requests( # noqa
588591 Returns:
589592 Dictionary containing data and metadata
590593 """
591- total_data = {"data" : []} # type: dict
592-
593- # Get first page to determine total number of documents
594- initial_criteria = copy (criteria )
595- data , total_num_docs = self ._submit_request_and_process (
596- url = url ,
597- verify = True ,
598- params = initial_criteria ,
599- use_document_model = use_document_model ,
600- timeout = timeout ,
601- )
594+ # Parameters that naturally support comma-separated values and should NOT be split
595+ no_split_params = {
596+ "elements" ,
597+ "exclude_elements" ,
598+ "possible_species" ,
599+ "coordination_envs" ,
600+ "coordination_envs_anonymous" ,
601+ "has_props" ,
602+ "gb_plane" ,
603+ "rotation_axis" ,
604+ "keywords" ,
605+ "substrate_orientation" ,
606+ "film_orientation" ,
607+ "synthesis_type" ,
608+ "operations" ,
609+ "condition_mixing_device" ,
610+ "condition_mixing_media" ,
611+ "condition_heating_atmosphere" ,
612+ "_fields" ,
613+ "formula" ,
614+ "chemsys" ,
615+ }
616+
617+ # Check if we need to split any comma-separated parameters
618+ split_param = None
619+ split_values = None
620+ total_num_docs = 0 # Initialize before try/else blocks
621+
622+ for key , value in criteria .items ():
623+ if (
624+ isinstance (value , str )
625+ and "," in value
626+ and key not in no_split_params
627+ and not key .startswith ("_" )
628+ ):
629+ split_param = key
630+ split_values = value .split ("," )
631+ break
632+
633+ # If we found a parameter to split, try the request first and only split on error
634+ if split_param and split_values and len (split_values ) > 1 :
635+ try :
636+ # First, try the request with all values as-is
637+ initial_criteria = copy (criteria )
638+ data , total_num_docs = self ._submit_request_and_process (
639+ url = url ,
640+ verify = True ,
641+ params = initial_criteria ,
642+ use_document_model = use_document_model ,
643+ timeout = timeout ,
644+ )
602645
603- total_data ["data" ].extend (data ["data" ])
646+ # Check if we got 0 results - some parameters are silently ignored by the API
647+ # when passed as comma-separated values, so we need to split them anyway
648+ if total_num_docs == 0 and len (split_values ) > 1 :
649+ # Treat this the same as a 422 error - split into batches
650+ raise MPRestError (
651+ "Got 0 results for comma-separated parameter, will try splitting"
652+ )
653+
654+ # If successful, continue with normal pagination
655+ total_data = {"data" : []} # type: dict
656+ total_data ["data" ].extend (data ["data" ])
657+
658+ if "meta" in data :
659+ total_data ["meta" ] = data ["meta" ]
660+
661+ # Continue with pagination if needed (handled below)
662+
663+ except MPRestError as e :
664+ # If we get 422 or 414 error, or 0 results for comma-separated params, split into batches
665+ if "422" in str (e ) or "414" in str (e ) or "Got 0 results" in str (e ):
666+ total_data = {"data" : []} # type: dict
667+ total_num_docs = 0
668+
669+ # Batch the split values to reduce number of requests
670+ # Use batches of up to 100 values to balance URL length and request count
671+ batch_size = min (100 , max (1 , len (split_values ) // 10 ))
672+
673+ # Setup progress bar for split parameter requests
674+ num_batches = ceil (len (split_values ) / batch_size )
675+ pbar_message = f"Retrieving { len (split_values )} { split_param } values in { num_batches } batches"
676+ pbar = (
677+ tqdm (
678+ desc = pbar_message ,
679+ total = num_batches ,
680+ )
681+ if not self .mute_progress_bars
682+ else None
683+ )
684+
685+ for i in range (0 , len (split_values ), batch_size ):
686+ batch = split_values [i : i + batch_size ]
687+ split_criteria = copy (criteria )
688+ split_criteria [split_param ] = "," .join (batch )
689+
690+ # Recursively call _submit_requests with the batch
691+ # This will trigger another split if the batch is still too large
692+ result = self ._submit_requests (
693+ url = url ,
694+ criteria = split_criteria ,
695+ use_document_model = use_document_model ,
696+ chunk_size = chunk_size ,
697+ num_chunks = num_chunks ,
698+ timeout = timeout ,
699+ )
700+
701+ total_data ["data" ].extend (result ["data" ])
702+ if "meta" in result :
703+ total_data ["meta" ] = result ["meta" ]
704+ total_num_docs += result ["meta" ].get ("total_doc" , 0 )
705+
706+ if pbar is not None :
707+ pbar .update (1 )
708+
709+ if pbar is not None :
710+ pbar .close ()
711+
712+ # Update total_doc if we have meta
713+ if "meta" in total_data :
714+ total_data ["meta" ]["total_doc" ] = total_num_docs
715+
716+ return total_data
717+ else :
718+ # Re-raise other errors
719+ raise
720+ else :
721+ # No splitting needed - get first page
722+ total_data = {"data" : []} # type: dict
723+ initial_criteria = copy (criteria )
724+ data , total_num_docs = self ._submit_request_and_process (
725+ url = url ,
726+ verify = True ,
727+ params = initial_criteria ,
728+ use_document_model = use_document_model ,
729+ timeout = timeout ,
730+ )
731+
732+ total_data ["data" ].extend (data ["data" ])
604733
605- if "meta" in data :
606- total_data ["meta" ] = data ["meta" ]
734+ if "meta" in data :
735+ total_data ["meta" ] = data ["meta" ]
607736
608737 # Get max number of response pages
609738 max_pages = (
0 commit comments