From 11432f309c7b35dbffb0f23b65b65b2cb0e8ee81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20B=C3=A4rring?= Date: Wed, 22 Oct 2025 16:16:10 +0200 Subject: [PATCH] New lexer rules and udunits2 decodeInput function --- lib/scanner.l | 10 +++++++++- prog/udunits2.c | 42 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/lib/scanner.l b/lib/scanner.l index 456fe02..1c28a0f 100644 --- a/lib/scanner.l +++ b/lib/scanner.l @@ -140,7 +140,9 @@ from [Ff][Rr][Oo][Mm] since [Ss][Ii][Nn][Cc][Ee] ref [Rr][Ee][Ff] per [Pp][Ee][Rr] - +nanspell ([nN][aA][nN](\([^()]*\))?) +infspell ([iI][nN][fF]([iI][nN][iI][tT][yY])?) +idchar [A-Za-z0-9_] %Start ID_SEEN SHIFT_SEEN DATE_SEEN CLOCK_SEEN %% @@ -149,6 +151,12 @@ per [Pp][Ee][Rr] _restartScanner = 0; } +{sign}?{nanspell}{idchar} { yyless(0);} +{sign}?{infspell}{idchar} { yyless(0);} + +{sign}?{nanspell} { yyerror("NaN is not allowed in unit expressions."); return 0; } +{sign}?{infspell} { yyerror("Infinity is not allowed in unit expressions."); return 0; } + {space}*(@|{after}|{from}|{ref}|{since}){space}* { BEGIN SHIFT_SEEN; return SHIFT; diff --git a/prog/udunits2.c b/prog/udunits2.c index 94c0e0c..dd55942 100644 --- a/prog/udunits2.c +++ b/prog/udunits2.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #ifndef _MSC_VER #include @@ -423,11 +424,42 @@ decodeInput( ut_free(_haveUnit); - int nbytes; - if (sscanf(input, "%lg %n", &_haveUnitAmount, &nbytes) == 1) { - input += nbytes; - } - else { + int nbytes = 0; + double amt; + const char *p = input; + char *endp = NULL; + + errno = 0; + amt = strtod(p, &endp); + + if (endp != p) { + /* We did consume something that looks like a number? */ + int next = (unsigned char)*endp; + + /* If the numeric value is non-finite (NaN/Inf) ... */ + if (!isfinite(amt)) { + /* ... but the next char continues an identifier, then that was not an amount. + Example: "nanosecond" -> "n" "a" "n" then "o" (identifier char).*/ + if (isalpha(next) || next == '_') { + /* Treat as: no leading number; let ut_parse() see the whole string. */ + _haveUnitAmount = 1; + /* DO NOT advance input. */ + } else { + /* Truly a standalone non-finite amount (e.g. "nan m", "inf s") -> reject */ + errMsg("NaN or Infinity is not allowed in unit expressions."); + return 0; + } + } else { + /* Finite amount is OK: Accept and advance to the remainder. */ + _haveUnitAmount = amt; + input = endp; + /* Optional: skip a single ASCII space if present (traditional behavior). */ + while (*input && isspace((unsigned char)*input)) { + ++input; + } + } + } else { + /* No leading number parsed at all. */ _haveUnitAmount = 1; }