@@ -542,3 +542,305 @@ def build_bq_where_clause(filters, join_with_space=False, comb_with='AND', field
542542 filter_set .append ('{}{}{}' .format ("(" if encapsulate else "" , filter_string , ")" if encapsulate else "" ))
543543
544544 return " {} " .format (comb_with ).join (filter_set )
545+
546+
547+ # Builds a BQ API v1 query parameter set and WHERE clause string from a set of filters of the form:
548+ # {
549+ # 'field_name': [<value>,...]
550+ # }
551+ # Breaks out '<ATTR> IS NULL'
552+ # 2+ values are converted to IN (<value>,...)
553+ # Filters must already be pre-bucketed or formatted
554+ # Use of LIKE is detected based on single-length value array and use of % in the value string
555+ # Support special 'mutation' filter category
556+ # Support for Greater/Less than (or equal to) via [gl]t[e]{0,1} in attr name,
557+ # eg. {"age_at_diagnosis_gte": [50,]}
558+ # Support for BETWEEN via _btw in attr name, eg. ("wbc_at_diagnosis_btw": [800,1200]}
559+ # Support for providing an explicit schema of the fields being searched
560+ # Support for specifying a set of continuous numeric attributes to be presumed for BETWEEN clauses
561+ #
562+ # TODO: add support for DATETIME eg 6/10/2010
563+ def build_bq_filter_and_params_v1 (filters , comb_with = 'AND' , param_suffix = None , with_count_toggle = False ,
564+ field_prefix = None , type_schema = None , case_insens = True , continuous_numerics = None ):
565+ if field_prefix and field_prefix [- 1 ] != "." :
566+ field_prefix += "."
567+
568+ continuous_numerics = continuous_numerics or []
569+
570+ result = {
571+ 'filter_string' : '' ,
572+ 'parameters' : [],
573+ 'attr_params' : {}
574+ }
575+
576+ attr_filters = {}
577+
578+ if with_count_toggle :
579+ result ['count_params' ] = {}
580+
581+ filter_set = []
582+
583+ mutation_filters = {}
584+ other_filters = {}
585+
586+ # Split mutation filters into their own set, because of repeat use of the same attrs
587+ for attr in filters :
588+ if 'MUT:' in attr :
589+ mutation_filters [attr ] = filters [attr ]
590+ else :
591+ other_filters [attr ] = filters [attr ]
592+
593+ mut_filtr_count = 1
594+ # 'Mutation' filters, special category for MUT: type filters
595+ for attr , values in list (mutation_filters .items ()):
596+ if type (values ) is not list :
597+ values = [values ]
598+ gene = attr .split (':' )[2 ]
599+ filter_type = attr .split (':' )[- 1 ].lower ()
600+ invert = bool (attr .split (':' )[3 ] == 'NOT' )
601+ param_name = 'gene{}{}' .format (str (mut_filtr_count ), '_{}' .format (param_suffix ) if param_suffix is not None else '' )
602+ filter_string = '{}Hugo_Symbol = @{} AND ' .format ('' if not field_prefix else field_prefix , param_name )
603+
604+ gene_query_param = {
605+ 'name' : param_name ,
606+ 'parameterType' : {
607+ 'type' : 'STRING'
608+ },
609+ 'parameterValue' : {
610+ 'value' : gene
611+ }
612+ }
613+
614+ var_query_param = {
615+ 'name' : None ,
616+ 'parameterType' : {
617+ 'type' : None
618+ },
619+ 'parameterValue' : {
620+
621+ }
622+ }
623+
624+ if filter_type == 'category' and values [0 ].lower () == 'any' :
625+ filter_string += '{}Variant_Classification IS NOT NULL' .format (
626+ '' if not field_prefix else field_prefix , )
627+ var_query_param = None
628+ else :
629+ if filter_type == 'category' :
630+ values = MOLECULAR_CATEGORIES [values [0 ]]['attrs' ]
631+ var_param_name = "var_class{}{}" .format (str (mut_filtr_count ),
632+ '_{}' .format (param_suffix ) if param_suffix is not None else '' )
633+ filter_string += '{}Variant_Classification {}IN UNNEST(@{})' .format (
634+ '' if not field_prefix else field_prefix , 'NOT ' if invert else '' , var_param_name )
635+ var_query_param ['name' ] = var_param_name
636+ var_query_param ['parameterType' ]['type' ] = 'ARRAY'
637+ var_query_param ['parameterValue' ] = {'arrayValues' : [{'value' : x } for x in values ]}
638+ var_query_param ['parameterType' ]['arrayType' ] = {'type' : 'STRING' }
639+
640+ filter_set .append ('({})' .format (filter_string ))
641+ result ['parameters' ].append (gene_query_param )
642+ var_query_param and result ['parameters' ].append (var_query_param )
643+
644+ mut_filtr_count += 1
645+
646+ # Standard query filters
647+ for attr , values in list (other_filters .items ()):
648+ is_btw = re .search ('_e?btwe?' , attr .lower ()) is not None
649+ attr_name = attr [:attr .rfind ('_' )] if re .search ('_[gl]te?|_e?btwe?|_eq' , attr ) else attr
650+ if attr_name not in attr_filters :
651+ operator = 'OR'
652+ if 'values' in values :
653+ # This is a fully qualified attribute which needs to have its definition broken out
654+ operator = values ['op' ]
655+ values = values ['values' ]
656+ attr_filters [attr_name ] = {
657+ 'OP' : operator ,
658+ 'filters' : []
659+ }
660+ attr_filter_set = attr_filters [attr_name ]['filters' ]
661+ # We require our attributes to be value lists
662+ if type (values ) is not list :
663+ values = [values ]
664+ # However, *only* ranged numerics can be a list of lists; all others must be a single list
665+ else :
666+ if type (values [0 ]) is list and not is_btw and attr not in continuous_numerics :
667+ values = [y for x in values for y in x ]
668+
669+ parameter_type = None
670+ if (type_schema and type_schema .get (attr , None )):
671+ parameter_type = ('NUMERIC' if type_schema [attr ] != 'STRING' else 'STRING' )
672+ elif FIXED_TYPES .get (attr , None ):
673+ parameter_type = FIXED_TYPES .get (attr )
674+ else :
675+ # If the values are arrays we assume the first value in the first array is indicative of all
676+ # other values (since we don't support multi-typed fields)
677+ type_check = values [0 ] if type (values [0 ]) is not list else values [0 ][0 ]
678+ parameter_type = (
679+ 'STRING' if (
680+ type (type_check ) not in [int , float , complex ] and re .compile (r'[^0-9\.,]' ,
681+ re .UNICODE ).search (
682+ type_check )
683+ ) else 'NUMERIC'
684+ )
685+ filter_string = ''
686+ param_name = attr + '{}' .format ('_{}' .format (param_suffix ) if param_suffix is not None else '' )
687+ query_param = {
688+ 'name' : param_name ,
689+ 'parameterType' : {'type' : parameter_type },
690+ 'parameterValue' : {}
691+ }
692+ if 'None' in values :
693+ values .remove ('None' )
694+ filter_string = "{}{} IS NULL" .format ('' if not field_prefix else field_prefix , attr )
695+
696+ if len (values ) > 0 :
697+ if len (filter_string ):
698+ filter_string += " OR "
699+ if len (values ) == 1 and not is_btw :
700+ # Single scalar param
701+ query_param ['parameterValue' ]['value' ] = values [0 ]
702+ if query_param ['parameterType' ]['type' ] == 'STRING' :
703+ filter_string += "LOWER({}{}) = LOWER(@{})" .format ('' if not field_prefix else field_prefix , attr ,
704+ param_name )
705+ elif query_param ['parameterType' ]['type' ] == 'NUMERIC' :
706+ operator = "{}{}" .format (
707+ ">" if re .search (r'_gte?' , attr ) else "<" if re .search (r'_lte?' , attr ) else "" ,
708+ '=' if re .search (r'_[lg]te' , attr ) or not re .search (r'_[lg]' , attr ) or attr .endswith (
709+ '_eq' ) else ''
710+ )
711+ filter_string += "{}{} {} @{}" .format (
712+ '' if not field_prefix else field_prefix , attr_name ,
713+ operator , param_name
714+ )
715+ # Occasionally attributes may come in without the appropriate _e?btwe? suffix; we account for that here
716+ # by checking for the proper attr_name in the optional continuous_numerics list
717+ elif is_btw or attr_name in continuous_numerics :
718+ # Check for a single array of two and if we find it, convert it to an array containing
719+ # a 2-member array
720+ if len (values ) == 2 and type (values [0 ]) is not list :
721+ values = [values ]
722+ else :
723+ # confirm an array of arrays all contain paired values
724+ all_pairs = True
725+ for x in values :
726+ if len (x ) != 2 :
727+ all_pairs = False
728+ if not all_pairs :
729+ logger .error (
730+ "[ERROR] While parsing attribute {}, calculated to be a numeric range filter, found an unparseable value:" )
731+ logger .error ("[ERROR] {}" .format (values ))
732+ continue
733+ btw_counter = 1
734+ query_params = []
735+ btw_filter_strings = []
736+ for btws in values :
737+ param_name_1 = '{}_btw_{}' .format (param_name , btw_counter )
738+ btw_counter += 1
739+ param_name_2 = '{}_btw_{}' .format (param_name , btw_counter )
740+ btw_counter += 1
741+ # Generate the params for each of the BTW cases
742+ if attr .endswith ('_btw' ):
743+ ops = ["{}{} > @{}" .format (
744+ '' if not field_prefix else field_prefix , attr_name ,
745+ param_name_1
746+ )]
747+ # filter_string += " OR ".join(btw_filter_strings)
748+ ops .append ("{}{} < @{}" .format (
749+ '' if not field_prefix else field_prefix , attr_name ,
750+ param_name_2
751+ ))
752+ btw_filter_strings .append (
753+ " AND " .join (ops )
754+ )
755+ elif attr .endswith ('_ebtw' ):
756+ ops = ["{}{} >= @{}" .format (
757+ '' if not field_prefix else field_prefix , attr_name ,
758+ param_name_1
759+ )]
760+ # filter_string += " OR ".join(btw_filter_strings)
761+ ops .append ("{}{} < @{}" .format (
762+ '' if not field_prefix else field_prefix , attr_name ,
763+ param_name_2
764+ ))
765+ btw_filter_strings .append (
766+ " AND " .join (ops )
767+ )
768+ elif attr .endswith ('_btwe' ):
769+ ops = ["{}{} > @{}" .format (
770+ '' if not field_prefix else field_prefix , attr_name ,
771+ param_name_1
772+ )]
773+ # filter_string += " OR ".join(btw_filter_strings)
774+ ops .append ("{}{} <= @{}" .format (
775+ '' if not field_prefix else field_prefix , attr_name ,
776+ param_name_2
777+ ))
778+ btw_filter_strings .append (
779+ " AND " .join (ops )
780+ )
781+ else : # attr.endswith('_ebtwe'):
782+ btw_filter_strings .append ("{}{} BETWEEN @{} AND @{}" .format (
783+ '' if not field_prefix else field_prefix , attr_name ,
784+ param_name_1 ,
785+ param_name_2
786+ ))
787+ # filter_string += " OR ".join(btw_filter_strings)
788+
789+ # query_param becomes our template for each pair
790+ query_param_1 = copy .deepcopy (query_param )
791+ query_param_2 = copy .deepcopy (query_param )
792+ query_param_1 ['name' ] = param_name_1
793+ query_param_1 ['parameterValue' ]['value' ] = btws [0 ]
794+ query_param_2 ['name' ] = param_name_2
795+ query_param_2 ['parameterValue' ]['value' ] = btws [1 ]
796+ query_params .extend ([query_param_1 , query_param_2 , ])
797+
798+ filter_string += " OR " .join (btw_filter_strings )
799+ query_param = query_params
800+ else :
801+ if operator == 'AND' and len (values ) > 1 :
802+ # If an operator is to be AND'd with more than one value we must make an intersection statement
803+ # on the higher-level entity (i.e. select for studies which have series containing both values)
804+ # That cannot be performed here, as this is only a clause builder
805+ logger .warning ("[WARNING] Multiple-value AND clauses require an intersection statement!" )
806+ else :
807+ # Simple array param
808+ query_param ['parameterType' ]['type' ] = "ARRAY"
809+ query_param ['parameterType' ]['arrayType' ] = {
810+ 'type' : parameter_type
811+ }
812+ query_param ['parameterValue' ] = {
813+ 'arrayValues' : [{'value' : x .lower () if parameter_type == 'STRING' else x } for x in values ]}
814+
815+ clause_base = "%s IN UNNEST(@{})" % ("LOWER({}{})" if parameter_type == "STRING" else "{}{}" )
816+ filter_string += clause_base .format ('' if not field_prefix else field_prefix , attr ,
817+ param_name )
818+
819+ if with_count_toggle :
820+ filter_string = "({}) OR @{}_filtering = 'not_filtering'" .format (filter_string , param_name )
821+ result ['count_params' ][param_name ] = {
822+ 'name' : param_name + '_filtering' ,
823+ 'parameterType' : {
824+ 'type' : 'STRING'
825+ },
826+ 'parameterValue' : {
827+ 'value' : 'filtering'
828+ }
829+ }
830+ if attr not in result ['attr_params' ]:
831+ result ['attr_params' ][attr ] = []
832+ result ['attr_params' ][attr ].append (param_name )
833+ result ['parameters' ].append (result ['count_params' ][param_name ])
834+
835+ attr_filter_set .append ('{}' .format (filter_string ))
836+
837+ if type (query_param ) is list :
838+ result ['parameters' ].extend (query_param )
839+ else :
840+ result ['parameters' ].append (query_param )
841+
842+ filter_set = ["(({}))" .format (") {} (" .format (attr_filters [x ]['OP' ]).join (attr_filters [x ]['filters' ])) for x in
843+ attr_filters ]
844+ result ['filter_string' ] = " {} " .format (comb_with ).join (filter_set )
845+
846+ return result
0 commit comments