@@ -974,6 +974,7 @@ static void filters_set_format_int(filter_t *flt, bcf1_t *line, token_t *tok)
974974 if ( !(flt -> cached_GT .mask [i ] & (1 <<k )) ) continue ;
975975 dst [j ++ ] = src [k ];
976976 }
977+ if ( !j ) { bcf_double_set_missing (dst [j ]); j ++ ; }
977978 for (; j < tok -> nval1 ; j ++ ) bcf_double_set_vector_end (dst [j ]);
978979 }
979980 }
@@ -1064,6 +1065,7 @@ static void filters_set_format_float(filter_t *flt, bcf1_t *line, token_t *tok)
10641065 dst [j ] = src [k ];
10651066 j ++ ;
10661067 }
1068+ if ( !j ) { bcf_double_set_missing (dst [j ]); j ++ ; }
10671069 for (; j < tok -> nval1 ; j ++ ) bcf_double_set_vector_end (dst [j ]);
10681070 }
10691071 }
@@ -1113,30 +1115,39 @@ static void filters_set_format_string(filter_t *flt, bcf1_t *line, token_t *tok)
11131115
11141116 if ( nstr < 0 ) return ;
11151117
1118+ if ( tok -> idx == -3 && filters_cache_genotypes (flt ,line )!= 0 ) return ;
1119+
11161120 tok -> nvalues = tok -> str_value .l = nstr ;
11171121 tok -> nval1 = nstr / tok -> nsamples ;
11181122 for (i = 0 ; i < tok -> nsamples ; i ++ )
11191123 {
11201124 if ( !tok -> usmpl [i ] ) continue ;
11211125 char * src = tok -> str_value .s + i * tok -> nval1 , * dst = src ;
1122- int ibeg = 0 , idx = 0 ;
1126+ int ibeg = 0 ;
1127+ int idx = 0 ; // idx-th field
11231128 while ( ibeg < tok -> nval1 )
11241129 {
11251130 int iend = ibeg ;
11261131 while ( iend < tok -> nval1 && src [iend ] && src [iend ]!= ',' ) iend ++ ;
11271132
11281133 int keep = 0 ;
1129- if ( tok -> idx >= 0 )
1134+ if ( tok -> idx >= 0 ) // single index, given explicitly, e.g. AD[:1]
11301135 {
11311136 if ( tok -> idx == idx ) keep = 1 ;
11321137 }
1133- else if ( idx < tok -> nidxs )
1138+ else if ( tok -> idx == -3 ) // given by GT index, e.g. AD[:GT]
11341139 {
1135- if ( tok -> idxs [idx ] != 0 ) keep = 1 ;
1140+ if ( flt -> cached_GT .mask [i ] & (1 <<idx ) ) keep = 1 ;
1141+ }
1142+ else // given as a list, e.g. AD[:0,3]
1143+ {
1144+ if ( idx < tok -> nidxs )
1145+ {
1146+ if ( tok -> idxs [idx ] != 0 ) keep = 1 ;
1147+ }
1148+ else if ( tok -> idxs [tok -> nidxs - 1 ] < 0 )
1149+ keep = 1 ;
11361150 }
1137- else if ( tok -> idxs [tok -> nidxs - 1 ] < 0 )
1138- keep = 1 ;
1139-
11401151 if ( keep )
11411152 {
11421153 if ( ibeg != 0 ) memmove (dst , src + ibeg , iend - ibeg + 1 );
@@ -2407,6 +2418,18 @@ static int vector_logic_and(filter_t *filter, bcf1_t *line, token_t *rtok, token
24072418 return 2 ;
24082419}
24092420
2421+ // A note about comparisons:
2422+ // When setting value by determining index from the genotype, we face the problem
2423+ // of how to interpret truncating arrays. Say we have TAG defined as Number=. and
2424+ // GT:TAG 1/1:0,1,2 0/0:0
2425+ // Then when querying we expect the following expression to evaluate for the second
2426+ // sample as
2427+ // -i 'TAG[1:1]="."' .. true
2428+ // -i 'TAG[1:GT]="."' .. false
2429+ // The problem is that the implementation truncates the number of fields, filling
2430+ // usually fewer than the original number of per-sample values. This is fixed by
2431+ // adding an exception that makes the code aware of this: the GT indexing can be
2432+ // recognised by haveing tok->idx==-3
24102433#define CMP_VECTORS (atok ,btok ,_rtok ,CMP_OP ,missing_logic ) \
24112434{ \
24122435 token_t *rtok = _rtok; \
@@ -2494,6 +2517,8 @@ static int vector_logic_and(filter_t *filter, bcf1_t *line, token_t *rtok, token
24942517 double * bptr = btok -> values + i * btok -> nval1 ; \
24952518 for (j = 0 ; j < atok -> nval1 ; j ++ ) \
24962519 { \
2520+ if ( atok -> idx == -3 && bcf_double_is_vector_end (aptr [j ]) ) break ; /* explained above */ \
2521+ if ( btok -> idx == -3 && bcf_double_is_vector_end (bptr [j ]) ) break ; /* explained above */ \
24972522 int nmiss = bcf_double_is_missing_or_vector_end (aptr [j ]) ? 1 : 0 ; \
24982523 if ( nmiss && !missing_logic [0 ] ) continue ; /* any is missing => result is false */ \
24992524 nmiss += (bcf_double_is_missing_or_vector_end (bptr [j ]) ? 1 : 0 ); \
@@ -2515,9 +2540,10 @@ static int vector_logic_and(filter_t *filter, bcf1_t *line, token_t *rtok, token
25152540 { \
25162541 if ( !rtok -> usmpl [i ] ) continue ; \
25172542 double * aptr = atok -> values + i * atok -> nval1 ; \
2518- double * bptr = btok -> values + i * btok -> nval1 ; \
2543+ double * bptr = btok -> values ; \
25192544 for (j = 0 ; j < atok -> nval1 ; j ++ ) \
25202545 { \
2546+ if ( atok -> idx == -3 && bcf_double_is_vector_end (aptr [j ]) ) break ; /* explained above */ \
25212547 int miss = bcf_double_is_missing_or_vector_end (aptr [j ]) ? 1 : 0 ; \
25222548 if ( miss && !missing_logic [0 ] ) continue ; /* any is missing => result is false */ \
25232549 for (k = 0 ; k < btok -> nvalues ; k ++ ) \
@@ -2541,10 +2567,11 @@ static int vector_logic_and(filter_t *filter, bcf1_t *line, token_t *rtok, token
25412567 for (i = 0 ; i < btok -> nsamples ; i ++ ) \
25422568 { \
25432569 if ( !rtok -> usmpl [i ] ) continue ; \
2544- double * aptr = atok -> values + i * atok -> nval1 ; \
2570+ double * aptr = atok -> values ; \
25452571 double * bptr = btok -> values + i * btok -> nval1 ; \
25462572 for (j = 0 ; j < btok -> nval1 ; j ++ ) \
25472573 { \
2574+ if ( atok -> idx == -3 && bcf_double_is_vector_end (bptr [j ]) ) break ; /* explained above */ \
25482575 int miss = bcf_double_is_missing_or_vector_end (bptr [j ]) ? 1 : 0 ; \
25492576 if ( miss && !missing_logic [0 ] ) continue ; /* any is missing => result is false */ \
25502577 for (k = 0 ; k < atok -> nvalues ; k ++ ) \
0 commit comments