Skip to content

Commit a99e5db

Browse files
committed
cleanups of expression and condition parsing
we need quotes around bare words in more places. Any explicit cast is NOT passed down when parsing the next thing. Instead, the next thing is parsed as-is, and then the cast is applied by the current function. This cleans up a lot of odd cases. Also add more checks for different tmpl types when casting things Add '#if 0' out code to complain on unresolved data when parsing. Changing that will require a bunch of other updates, to add quotes around bare words. the tmpl_resolve() function would treat unresolved data as either enums or strings. That will be changing to require either '::' prefix on enums, OR quotes around non-attribute bare words. So (ippadr)* is now invalid, as "*" can't be parsed by tmpl_afrom_substr(). Instead, we must use (ipaddr)'*'
1 parent 560c706 commit a99e5db

File tree

7 files changed

+230
-176
lines changed

7 files changed

+230
-176
lines changed

src/lib/unlang/xlat_expr.c

Lines changed: 124 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -466,11 +466,33 @@ static fr_slen_t xlat_expr_print_regex(fr_sbuff_t *out, xlat_exp_t const *node,
466466
* A space is printed after the first argument only if
467467
* there's a second one. So add one if we "ate" the second argument.
468468
*/
469-
FR_SBUFF_IN_CHAR_RETURN(out, ' ');
469+
if (inst->xlat) FR_SBUFF_IN_CHAR_RETURN(out, ' ');
470470

471471
FR_SBUFF_IN_STRCPY_RETURN(out, fr_tokens[node->call.func->token]);
472472
FR_SBUFF_IN_CHAR_RETURN(out, ' ');
473473

474+
/*
475+
* Regexes which aren't instantiated: only for unit tests.
476+
*/
477+
if (!inst->xlat) {
478+
child = xlat_exp_next(node->call.args, child);
479+
fr_assert(!xlat_exp_next(node->call.args, child));
480+
fr_assert(child->type == XLAT_GROUP);
481+
482+
FR_SBUFF_IN_CHAR_RETURN(out, '/');
483+
FR_SBUFF_IN_STRCPY_RETURN(out, child->fmt);
484+
FR_SBUFF_IN_CHAR_RETURN(out, '/');
485+
486+
child = xlat_exp_head(child->group);
487+
fr_assert(child->type == XLAT_TMPL);
488+
489+
/*
490+
* The RHS may be a group
491+
*/
492+
FR_SBUFF_RETURN(regex_flags_print, out, tmpl_regex_flags(child->vpt));
493+
goto done;
494+
}
495+
474496
fr_assert(tmpl_contains_regex(inst->xlat->vpt));
475497

476498
if (inst->xlat->quote == T_SINGLE_QUOTED_STRING) FR_SBUFF_IN_CHAR_RETURN(out, 'm');
@@ -480,6 +502,7 @@ static fr_slen_t xlat_expr_print_regex(fr_sbuff_t *out, xlat_exp_t const *node,
480502

481503
FR_SBUFF_RETURN(regex_flags_print, out, inst->regex_flags);
482504

505+
done:
483506
FR_SBUFF_IN_CHAR_RETURN(out, ')');
484507

485508
return fr_sbuff_used_total(out) - at_in;
@@ -2410,12 +2433,12 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf
24102433
xlat_exp_t *node = NULL;
24112434
fr_sbuff_t our_in = FR_SBUFF(in);
24122435
fr_sbuff_marker_t opand_m;
2413-
tmpl_rules_t our_t_rules = *t_rules;
2436+
tmpl_rules_t our_t_rules;
24142437
tmpl_t *vpt = NULL;
24152438
fr_token_t quote;
24162439
int triple = 1;
2417-
fr_type_t cast_type = FR_TYPE_NULL;
2418-
xlat_exp_t *cast = NULL;
2440+
fr_type_t cast_type;
2441+
fr_dict_attr_t const *enumv;
24192442

24202443
XLAT_DEBUG("FIELD <-- %pV", fr_box_strvalue_len(fr_sbuff_current(in), fr_sbuff_remaining(in)));
24212444

@@ -2424,6 +2447,21 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf
24242447
*/
24252448
if (expr_cast_from_substr(&cast_type, &our_in) < 0) return -1;
24262449

2450+
/*
2451+
* Do NOT pass the cast down to the next set of parsing routines. Instead, let the next data be
2452+
* parsed as whatever, and then add a cast, or cast in place as necessary.
2453+
*/
2454+
our_t_rules = *t_rules;
2455+
if (cast_type == FR_TYPE_NULL) {
2456+
cast_type = our_t_rules.cast;
2457+
enumv = our_t_rules.enumv;
2458+
} else {
2459+
enumv = NULL;
2460+
}
2461+
2462+
our_t_rules.cast = FR_TYPE_NULL;
2463+
our_t_rules.enumv = NULL;
2464+
24272465
/*
24282466
* If we still have '(', then recurse for other expressions
24292467
*
@@ -2460,21 +2498,6 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf
24602498
goto done;
24612499
}
24622500

2463-
/*
2464-
* If there is a cast, try to pass it recursively to the parser. This allows us to set default
2465-
* data types, etc.
2466-
*
2467-
* We may end up removing the cast later, if for example the tmpl is an attribute whose data type
2468-
* matches the cast.
2469-
*
2470-
* This ifdef isn't defined anywhere, and is just a reminder to check this code when we move to
2471-
* tmpl_require_enum_prefix=yes. This cast has to be deleted.
2472-
*/
2473-
if (!tmpl_require_enum_prefix && (cast_type != FR_TYPE_NULL)) {
2474-
our_t_rules.cast = cast_type;
2475-
our_t_rules.enumv = NULL;
2476-
}
2477-
24782501
/*
24792502
* Record where the operand begins for better error offsets later
24802503
*/
@@ -2511,11 +2534,6 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf
25112534
FALL_THROUGH;
25122535

25132536
case T_BACK_QUOTED_STRING:
2514-
/*
2515-
* Don't put the cast in the tmpl, but put it instead in the expression.
2516-
*/
2517-
if (cast_type != FR_TYPE_NULL) our_t_rules.cast = FR_TYPE_NULL;
2518-
25192537
p_rules = value_parse_rules_quoted[quote];
25202538

25212539
/*
@@ -2608,16 +2626,25 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf
26082626

26092627
fr_sbuff_skip_whitespace(&our_in);
26102628

2629+
#if 0
26112630
/*
2612-
* The tmpl has a cast, and it's the same as the explicit cast we were given, we can discard the
2613-
* explicit cast.
2614-
*
2615-
* Also do this for tmpls which are attributes, and which have the same data type as the cast.
2616-
*
2617-
* This work isn't _strictly_ necessary, but it avoids duplicate casts at run-time.
2631+
* A bare word which is NOT a known attribute. That's an error.
2632+
*/
2633+
if (tmpl_is_data_unresolved(vpt)) {
2634+
fr_assert(quote == T_BARE_WORD);
2635+
fr_strerror_const("Failed parsing input");
2636+
fr_sbuff_set(&our_in, &opand_m);
2637+
goto error;
2638+
}
2639+
#endif
2640+
2641+
/*
2642+
* The tmpl has a cast, and it's the same as the explicit cast we were given, we can sometimes
2643+
* discard the explicit cast.
26182644
*/
26192645
if (cast_type != FR_TYPE_NULL) {
26202646
if (tmpl_rules_cast(vpt) == cast_type) {
2647+
fr_assert(0);
26212648
cast_type = FR_TYPE_NULL;
26222649

26232650
} else if (tmpl_is_attr(vpt)) {
@@ -2628,13 +2655,19 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf
26282655
da = tmpl_attr_tail_da(vpt); /* could be a list! */
26292656

26302657
/*
2631-
* Omit the cast if the da type matches our cast. BUT don't do this for enums! In that
2632-
* case, the cast will convert the value-box to one _without_ an enumv entry, which means
2633-
* that the value will get printed as its underlying data type, and not as the enum name.
2658+
* Set the cast for attributes. Note that tmpl_cast_set() will take care of
2659+
* suppressing redundant casts. But it still allows (uint32)&Service-Type,
2660+
* which means "return the raw value", and not "return enum name".
26342661
*/
2635-
if (da && (da->type == cast_type)) {
2636-
tmpl_cast_set(vpt, cast_type);
2637-
cast_type = FR_TYPE_NULL;
2662+
if (da) {
2663+
if (tmpl_cast_set(vpt, cast_type) < 0) {
2664+
fr_sbuff_set(&our_in, &opand_m);
2665+
goto error;
2666+
} else {
2667+
cast_type = FR_TYPE_NULL;
2668+
}
2669+
} else { /* it's something like &reply. */
2670+
fr_assert(0);
26382671
}
26392672

26402673
} else if (tmpl_is_data(vpt)) {
@@ -2650,43 +2683,54 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf
26502683
if (tmpl_value_type(vpt) == cast_type) {
26512684
cast_type = FR_TYPE_NULL;
26522685

2686+
} else if (tmpl_cast_in_place(vpt, cast_type, enumv) < 0) {
2687+
fr_sbuff_set(&our_in, &opand_m);
2688+
goto error;
2689+
26532690
} else {
26542691
/*
2655-
* Cast it now, and remove the cast type.
2692+
* We've parsed the data as the new data type, so we don't need any more
2693+
* casting.
26562694
*/
2657-
if (tmpl_cast_in_place(vpt, cast_type, NULL) < 0) {
2658-
fr_sbuff_set(&our_in, &opand_m);
2659-
goto error;
2660-
}
2661-
26622695
cast_type = FR_TYPE_NULL;
26632696
}
2664-
}
26652697

2666-
/*
2667-
* Cast a string to a string means "no cast".
2668-
*/
2669-
if ((cast_type == FR_TYPE_STRING) && (vpt->quote != T_BARE_WORD)) {
2670-
tmpl_cast_set(vpt, FR_TYPE_NULL);
2671-
cast_type = FR_TYPE_NULL;
2672-
}
2698+
} else if (tmpl_contains_xlat(vpt)) {
2699+
/*
2700+
* (string) "foo %{...}" is redundant. Drop the cast.
2701+
*/
2702+
if ((cast_type == FR_TYPE_STRING) && (vpt->quote != T_BARE_WORD)) {
2703+
tmpl_cast_set(vpt, FR_TYPE_NULL);
2704+
cast_type = FR_TYPE_NULL;
26732705

2674-
/*
2675-
* Push the cast down.
2676-
*
2677-
* But if we're casting to string, and the RHS is already a string, we don't need to cast
2678-
* it. We can just discard the cast.
2679-
*/
2680-
if ((cast_type != FR_TYPE_NULL) && (tmpl_rules_cast(vpt) == FR_TYPE_NULL)) {
2681-
if ((cast_type != FR_TYPE_STRING) || (vpt->quote == T_BARE_WORD)) {
2706+
} else {
2707+
/*
2708+
* Push the cast to the tmpl.
2709+
*/
26822710
tmpl_cast_set(vpt, cast_type);
2711+
cast_type = FR_TYPE_NULL;
26832712
}
2684-
cast_type = FR_TYPE_NULL;
2713+
2714+
} else if (tmpl_is_data_unresolved(vpt)) {
2715+
/*
2716+
* A bare word which is NOT a known attribute. That's an error.
2717+
*/
2718+
fr_assert(quote == T_BARE_WORD);
2719+
fr_strerror_const("Failed parsing input");
2720+
fr_sbuff_set(&our_in, &opand_m);
2721+
goto error;
2722+
2723+
} else {
2724+
/*
2725+
* Regex? Or something else weird?
2726+
*/
2727+
tmpl_debug(vpt);
2728+
fr_assert(0);
26852729
}
26862730
}
26872731

26882732
/*
2689-
* Else we're not hoisting, set the node to the VPT
2733+
* Assign the tmpl to the node.
26902734
*/
26912735
node->vpt = vpt;
26922736
node->quote = quote;
@@ -2731,6 +2775,8 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf
27312775
* If there is a cast, then reparent the node with a cast wrapper.
27322776
*/
27332777
if (cast_type != FR_TYPE_NULL) {
2778+
xlat_exp_t *cast;
2779+
27342780
MEM(cast = expr_cast_alloc(head, cast_type));
27352781
xlat_func_append_arg(cast, node, false);
27362782
node = cast;
@@ -2943,30 +2989,30 @@ static fr_slen_t tokenize_expression(xlat_exp_head_t *head, xlat_exp_t **out, fr
29432989
fr_sbuff_skip_whitespace(&our_in);
29442990
fr_sbuff_marker(&m_rhs, &our_in);
29452991

2946-
#if 0
29472992
/*
2948-
* If LHS is attr && structural, allow only == and !=, then check that the RHS is {}.
2949-
*
2950-
* However, we don't want the LHS evaluated, so just re-write it as an "exists" xlat?
2951-
*
2952-
* @todo - check lists for equality?
2993+
* We now parse the RHS, allowing a (perhaps different) cast on the RHS.
29532994
*/
2954-
if ((lhs->type == XLAT_TMPL) && tmpl_is_attr(lhs->vpt) && fr_type_is_structural(tmpl_attr_tail_da(lhs->vpt)->type)) {
2955-
if ((op != T_OP_CMP_EQ) && (op != T_OP_NE)) {
2956-
fr_strerror_printf("Invalid operator '%s' for left hand side structural attribute", fr_tokens[op]);
2957-
fr_sbuff_set(&our_in, &m_op);
2995+
XLAT_DEBUG(" recurse RHS <-- %pV", fr_box_strvalue_len(fr_sbuff_current(&our_in), fr_sbuff_remaining(&our_in)));
2996+
if ((op == T_OP_REG_EQ) || (op == T_OP_REG_NE)) {
2997+
fr_type_t type;
2998+
2999+
/*
3000+
* @todo - LHS shouldn't be anything else.
3001+
*/
3002+
fr_assert(lhs->type == XLAT_TMPL);
3003+
3004+
type = tmpl_cast_get(lhs->vpt);
3005+
if ((type != FR_TYPE_NULL) && (type != FR_TYPE_STRING)) {
3006+
fr_strerror_const("Casts cannot be used with regular expressions");
3007+
fr_sbuff_set(&our_in, &m_lhs);
29583008
FR_SBUFF_ERROR_RETURN(&our_in);
29593009
}
29603010

2961-
fr_assert(0);
2962-
}
2963-
#endif
3011+
/*
3012+
* Cast the LHS to a string, if it's not already one!
3013+
*/
3014+
if (lhs->vpt->quote == T_BARE_WORD) tmpl_cast_set(lhs->vpt, FR_TYPE_STRING);
29643015

2965-
/*
2966-
* We now parse the RHS, allowing a (perhaps different) cast on the RHS.
2967-
*/
2968-
XLAT_DEBUG(" recurse RHS <-- %pV", fr_box_strvalue_len(fr_sbuff_current(&our_in), fr_sbuff_remaining(&our_in)));
2969-
if ((op == T_OP_REG_EQ) || (op == T_OP_REG_NE)) {
29703016
slen = tokenize_regex_rhs(head, &rhs, &our_in, t_rules, bracket_rules);
29713017
} else {
29723018
slen = tokenize_expression(head, &rhs, &our_in, p_rules, t_rules, op, bracket_rules, input_rules, cond);

src/tests/unit/condition/base.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ condition &Event-Timestamp == 'January 1, 2012'
232232

233233
# literals are parsed when the conditions are parsed
234234
condition (integer)X == 1
235-
match ERROR offset 10: Failed parsing string as type 'uint32'
235+
match ERROR offset 10: Failed parsing input
236236

237237
#
238238
# @todo - parsing - there is no enum named "X"
@@ -266,7 +266,7 @@ condition (ether) 00:11:22:33:44:55 == "%md4('00:11:22:33:44:55')"
266266
match (00:11:22:33:44:55 == "%md4('00:11:22:33:44:55')")
267267

268268
condition (ether) 00:XX:22:33:44:55 == 00:11:22:33:44:55
269-
match ERROR offset 12: Missing separator, expected ':'
269+
match ERROR offset 9: enum values must contain at least one alpha character
270270

271271
#
272272
# Tests for boolean data types.
@@ -347,7 +347,7 @@ condition (ipaddr)127.0.0.1/32 == 127.0.0.1
347347
match true
348348

349349
condition (ipaddr)127.0.0.1/327 == 127.0.0.1
350-
match ERROR offset 9: Invalid IPv4 mask length "/327". Should be between 0-32
350+
match ERROR offset 9: enum values must contain at least one alpha character
351351

352352
condition (ipaddr)127.0.0.1/32 == 127.0.0.1
353353
match true
@@ -420,7 +420,6 @@ match ERROR offset 2: Invalid data type 'vsa' in cast
420420
condition (string)"foo" == &User-Name
421421
match ("foo" == &User-Name)
422422

423-
# This used to be expr, but expr isn't a builtin, so it failed...
424423
condition (integer)"%md4(' 1 + 1')" < &NAS-Port
425424
match ((uint32)"%md4(' 1 + 1')" < &NAS-Port)
426425

src/tests/unit/condition/ip.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ proto-dictionary radius
1010
condition &NAS-IPv6-Address == ::
1111
match (&NAS-IPv6-Address == ::)
1212

13-
condition &NAS-IP-Address == (ipaddr)*
13+
condition &NAS-IP-Address == (ipaddr)'*'
1414
match (&NAS-IP-Address == 0.0.0.0)
1515

1616
condition &NAS-IP-Address == 0.0.0.0

0 commit comments

Comments
 (0)