Skip to content

Commit db06e16

Browse files
committed
Improves date math expression parsing.
Refactors the DateMathPartParser to enhance performance and accuracy in parsing Elasticsearch date math expressions. - Optimizes explicit date parsing by using length-based format selection and pre-compiled regex. - Improves timezone handling and format ordering for better parsing. - Adds comprehensive tests to ensure correctness.
1 parent 6c4049d commit db06e16

File tree

4 files changed

+135
-75
lines changed

4 files changed

+135
-75
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"cSpell.words": [
3+
"millis",
34
"timespan"
45
]
56
}

src/Exceptionless.DateTimeExtensions/FormatParsers/FormatParsers/PartParsers/DateMathPartParser.cs

Lines changed: 127 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ public class DateMathPartParser : IPartParser
2727
@"(?<operations>(?:[+\-/]\d*[yMwdhHms])*)$",
2828
RegexOptions.Compiled | RegexOptions.IgnoreCase);
2929

30+
// Pre-compiled regex for operation parsing to avoid repeated compilation
31+
private static readonly Regex _operationRegex = new(@"([+\-/])(\d*)([yMwdhHms])", RegexOptions.Compiled);
32+
3033
public Regex Regex => _parser;
3134

3235
public DateTimeOffset? Parse(Match match, DateTimeOffset relativeBaseTime, bool isUpperLimit)
@@ -54,13 +57,10 @@ public class DateMathPartParser : IPartParser
5457

5558
// Parse and apply operations
5659
string operations = match.Groups["operations"].Value;
57-
var result = ApplyOperations(baseTime, operations, isUpperLimit);
58-
59-
return result;
60+
return ApplyOperations(baseTime, operations, isUpperLimit);
6061
}
6162
catch
6263
{
63-
// Return null for any parsing errors to maintain robustness
6464
return null;
6565
}
6666
}
@@ -69,6 +69,8 @@ public class DateMathPartParser : IPartParser
6969
/// Attempts to parse an explicit date string with proper timezone handling.
7070
/// Supports various Elasticsearch-compatible date formats with optional timezone information.
7171
///
72+
/// Performance-optimized with length checks and format ordering by likelihood.
73+
///
7274
/// Timezone Behavior:
7375
/// - If timezone is specified (Z, +HH:MM, -HH:MM): Preserved from input
7476
/// - If no timezone specified: Uses the provided fallback offset
@@ -83,80 +85,148 @@ private static bool TryParseExplicitDate(string dateStr, TimeSpan offset, out Da
8385
{
8486
result = default;
8587

86-
if (string.IsNullOrEmpty(dateStr))
88+
if (String.IsNullOrEmpty(dateStr))
8789
return false;
8890

89-
// Try various formats that Elasticsearch supports
90-
string[] formats = {
91-
"yyyy-MM-dd",
92-
"yyyy-MM-ddTHH:mm:ss",
93-
"yyyy-MM-ddTHH:mm",
94-
"yyyy-MM-ddTHH",
95-
"yyyy-MM-ddTHH:mm:ssZ",
96-
"yyyy-MM-ddTHH:mm:ss.fff",
97-
"yyyy-MM-ddTHH:mm:ss.fffZ",
98-
"yyyy-MM-ddTHH:mm:sszzz",
99-
"yyyy-MM-ddTHH:mm:ss.fffzzz",
100-
"yyyy.MM.dd",
101-
"yyyy.MM.ddTHH:mm:ss",
102-
"yyyy.MM.ddTHH:mm",
103-
"yyyy.MM.ddTHH",
104-
"yyyy.MM.ddTHH:mm:ssZ",
105-
"yyyy.MM.ddTHH:mm:ss.fff",
106-
"yyyy.MM.ddTHH:mm:ss.fffZ",
107-
"yyyy.MM.ddTHH:mm:sszzz",
108-
"yyyy.MM.ddTHH:mm:ss.fffzzz",
109-
"yyyyMMdd",
110-
"yyyyMMddTHHmmss",
111-
"yyyyMMddTHHmm",
112-
"yyyyMMddTHH",
113-
"yyyyMMddTHHmmssZ",
114-
"yyyyMMddTHHmmss.fff",
115-
"yyyyMMddTHHmmss.fffZ",
116-
"yyyyMMddTHHmmsszzz",
117-
"yyyyMMddTHHmmss.fffzzz"
118-
};
91+
int len = dateStr.Length;
92+
93+
// Early exit for obviously invalid lengths
94+
if (len is < 4 or > 29) // Min: yyyy (4), Max: yyyy-MM-ddTHH:mm:ss.fffzzz (29)
95+
return false;
11996

120-
foreach (string format in formats)
97+
// Fast character validation for year digits
98+
if (!Char.IsDigit(dateStr[0]) || !Char.IsDigit(dateStr[1]) ||
99+
!Char.IsDigit(dateStr[2]) || !Char.IsDigit(dateStr[3]))
100+
return false;
101+
102+
// Detect timezone presence for smart format selection
103+
bool hasZ = dateStr[len - 1] == 'Z';
104+
bool hasTimezone = hasZ;
105+
if (!hasTimezone && len > 10) // Check for +/-HH:mm timezone format
121106
{
122-
// Handle timezone-aware formats differently from timezone-naive formats
123-
if (format.EndsWith("Z") || format.Contains("zzz"))
107+
for (int index = Math.Max(10, len - 6); index < len - 1; index++)
124108
{
125-
// Try parsing with timezone information preserved
126-
if (DateTimeOffset.TryParseExact(dateStr, format, CultureInfo.InvariantCulture,
127-
DateTimeStyles.None, out result))
109+
if (dateStr[index] is '+' or '-' && index + 1 < len && Char.IsDigit(dateStr[index + 1]))
128110
{
129-
return true;
111+
hasTimezone = true;
112+
break;
130113
}
131114
}
132-
else
115+
}
116+
117+
// Length-based format selection for maximum performance
118+
// Only try formats that match the exact length to avoid unnecessary parsing attempts
119+
switch (len)
120+
{
121+
case 4: // Built-in: year (yyyy)
122+
return TryParseWithFormat(dateStr, "yyyy", offset, false, out result);
123+
124+
case 7: // Built-in: year_month (yyyy-MM)
125+
if (dateStr[4] == '-')
126+
return TryParseWithFormat(dateStr, "yyyy-MM", offset, false, out result);
127+
break;
128+
129+
case 8: // Built-in: basic_date (yyyyMMdd)
130+
return TryParseWithFormat(dateStr, "yyyyMMdd", offset, false, out result);
131+
132+
case 10: // Built-in: date (yyyy-MM-dd)
133+
if (dateStr[4] == '-' && dateStr[7] == '-')
134+
return TryParseWithFormat(dateStr, "yyyy-MM-dd", offset, false, out result);
135+
break;
136+
137+
case 13: // Built-in: date_hour (yyyy-MM-ddTHH)
138+
if (dateStr[4] == '-' && dateStr[7] == '-' && dateStr[10] == 'T')
139+
return TryParseWithFormat(dateStr, "yyyy-MM-ddTHH", offset, false, out result);
140+
break;
141+
142+
case 16: // Built-in: date_hour_minute (yyyy-MM-ddTHH:mm)
143+
if (dateStr[4] == '-' && dateStr[7] == '-' && dateStr[10] == 'T' && dateStr[13] == ':')
144+
return TryParseWithFormat(dateStr, "yyyy-MM-ddTHH:mm", offset, false, out result);
145+
break;
146+
147+
case 19: // Built-in: date_hour_minute_second (yyyy-MM-ddTHH:mm:ss)
148+
if (dateStr[4] == '-' && dateStr[7] == '-' && dateStr[10] == 'T' && dateStr[13] == ':' && dateStr[16] == ':')
149+
return TryParseWithFormat(dateStr, "yyyy-MM-ddTHH:mm:ss", offset, false, out result);
150+
break;
151+
152+
case 20: // Built-in: date_time_no_millis (yyyy-MM-ddTHH:mm:ssZ)
153+
if (hasZ && dateStr[4] == '-' && dateStr[7] == '-' && dateStr[10] == 'T' && dateStr[13] == ':' && dateStr[16] == ':')
154+
return TryParseWithFormat(dateStr, "yyyy-MM-ddTHH:mm:ssZ", offset, true, out result);
155+
break;
156+
157+
case 23: // Built-in: date_hour_minute_second_millis (yyyy-MM-ddTHH:mm:ss.fff)
158+
if (dateStr[4] == '-' && dateStr[7] == '-' && dateStr[10] == 'T' && dateStr[13] == ':' && dateStr[16] == ':' && dateStr[19] == '.')
159+
return TryParseWithFormat(dateStr, "yyyy-MM-ddTHH:mm:ss.fff", offset, false, out result);
160+
break;
161+
162+
case 24: // Built-in: date_time (yyyy-MM-ddTHH:mm:ss.fffZ)
163+
if (hasZ && dateStr[4] == '-' && dateStr[7] == '-' && dateStr[10] == 'T' && dateStr[13] == ':' && dateStr[16] == ':' && dateStr[19] == '.')
164+
return TryParseWithFormat(dateStr, "yyyy-MM-ddTHH:mm:ss.fffZ", offset, true, out result);
165+
break;
166+
}
167+
168+
// Handle RFC 822 timezone offset formats (variable lengths: +05:00, +0500, etc.)
169+
// Note: .NET uses 'zzz' pattern for timezone offsets like +05:00
170+
if (hasTimezone && !hasZ)
171+
{
172+
// Only try timezone formats for lengths that make sense
173+
if (len is >= 25 and <= 29) // +05:00 variants
133174
{
134-
// For formats without timezone, parse as DateTime and treat as if already in target timezone
135-
if (DateTime.TryParseExact(dateStr, format, CultureInfo.InvariantCulture,
136-
DateTimeStyles.None, out DateTime dateTime))
175+
if (dateStr.Contains(".")) // with milliseconds
137176
{
138-
// Treat the parsed DateTime as if it's already in the target timezone
139-
// This avoids any conversion issues
140-
result = new DateTimeOffset(dateTime.Ticks, offset);
141-
return true;
177+
// Try: yyyy-MM-ddTHH:mm:ss.fff+05:00
178+
if (TryParseWithFormat(dateStr, "yyyy-MM-ddTHH:mm:ss.fffzzz", offset, true, out result))
179+
return true;
142180
}
143181
}
182+
183+
if (len is >= 22 and <= 25) // without milliseconds
184+
{
185+
// Try: yyyy-MM-ddTHH:mm:ss+05:00
186+
if (TryParseWithFormat(dateStr, "yyyy-MM-ddTHH:mm:sszzz", offset, true, out result))
187+
return true;
188+
}
189+
}
190+
191+
return false;
192+
}
193+
194+
/// <summary>
195+
/// Helper method to parse with a specific format, handling timezone appropriately.
196+
/// </summary>
197+
private static bool TryParseWithFormat(string dateStr, string format, TimeSpan offset, bool hasTimezone, out DateTimeOffset result)
198+
{
199+
result = default;
200+
201+
if (hasTimezone)
202+
{
203+
// Try parsing with timezone information preserved
204+
return DateTimeOffset.TryParseExact(dateStr, format, CultureInfo.InvariantCulture,
205+
DateTimeStyles.None, out result);
206+
}
207+
208+
// For formats without timezone, parse as DateTime and treat as if already in target timezone
209+
if (DateTime.TryParseExact(dateStr, format, CultureInfo.InvariantCulture,
210+
DateTimeStyles.None, out DateTime dateTime))
211+
{
212+
// Treat the parsed DateTime as if it's already in the target timezone
213+
result = new DateTimeOffset(dateTime.Ticks, offset);
214+
return true;
144215
}
145216

146217
return false;
147218
}
148219

149220
private static DateTimeOffset ApplyOperations(DateTimeOffset baseTime, string operations, bool isUpperLimit)
150221
{
151-
if (string.IsNullOrEmpty(operations))
222+
if (String.IsNullOrEmpty(operations))
152223
return baseTime;
153224

154225
var result = baseTime;
155-
var operationRegex = new Regex(@"([+\-/])(\d*)([yMwdhHms])", RegexOptions.Compiled);
156-
var matches = operationRegex.Matches(operations);
226+
var matches = _operationRegex.Matches(operations);
157227

158228
// Validate that all operations were matched properly
159-
var totalMatchLength = matches.Cast<Match>().Sum(m => m.Length);
229+
int totalMatchLength = matches.Cast<Match>().Sum(m => m.Length);
160230
if (totalMatchLength != operations.Length)
161231
{
162232
// If not all operations were matched, there are invalid operations
@@ -170,7 +240,7 @@ private static DateTimeOffset ApplyOperations(DateTimeOffset baseTime, string op
170240
string unit = opMatch.Groups[3].Value;
171241

172242
// Default amount is 1 if not specified
173-
int amount = string.IsNullOrEmpty(amountStr) ? 1 : int.Parse(amountStr);
243+
int amount = String.IsNullOrEmpty(amountStr) ? 1 : Int32.Parse(amountStr);
174244

175245
switch (operation)
176246
{
@@ -199,7 +269,7 @@ private static DateTimeOffset AddTimeUnit(DateTimeOffset dateTime, int amount, s
199269
"M" => dateTime.AddMonths(amount), // Capital M for months
200270
"m" => dateTime.AddMinutes(amount), // Lowercase m for minutes
201271
"w" => dateTime.AddDays(amount * 7),
202-
"d" => dateTime.AddDays(amount),
272+
"d" => dateTime.AddDays(amount), // Only lowercase d for days
203273
"h" or "H" => dateTime.AddHours(amount),
204274
"s" => dateTime.AddSeconds(amount),
205275
_ => throw new ArgumentException($"Invalid time unit: {unit}")
@@ -219,7 +289,7 @@ private static DateTimeOffset RoundToUnit(DateTimeOffset dateTime, string unit,
219289
"y" => isUpperLimit ? dateTime.EndOfYear() : dateTime.StartOfYear(),
220290
"M" => isUpperLimit ? dateTime.EndOfMonth() : dateTime.StartOfMonth(),
221291
"w" => isUpperLimit ? dateTime.EndOfWeek() : dateTime.StartOfWeek(),
222-
"d" => isUpperLimit ? dateTime.EndOfDay() : dateTime.StartOfDay(),
292+
"d" => isUpperLimit ? dateTime.EndOfDay() : dateTime.StartOfDay(), // Only lowercase d for days
223293
"h" or "H" => isUpperLimit ? dateTime.EndOfHour() : dateTime.StartOfHour(),
224294
"m" => isUpperLimit ? dateTime.EndOfMinute() : dateTime.StartOfMinute(),
225295
"s" => isUpperLimit ? dateTime.EndOfSecond() : dateTime.StartOfSecond(),

tests/Exceptionless.DateTimeExtensions.Tests/FormatParsers/PartParsers/DateMathPartParserTests.cs

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,13 @@ public static IEnumerable<object[]> ExplicitDateInputs
117117
var baseDate = new DateTimeOffset(2001, 2, 1, 0, 0, 0, _now.Offset);
118118

119119
return new[] {
120-
// Basic explicit date formats
120+
// Basic explicit date formats (officially supported by Elasticsearch)
121121
new object[] { "2001-02-01||", false, baseDate },
122122
["2001-02-01||", true, baseDate],
123-
["2001.02.01||", false, baseDate],
124-
["2001.02.01||", true, baseDate],
125123
["20010201||", false, baseDate],
126124
["20010201||", true, baseDate],
127125

128-
// With time components
126+
// With time components (ISO 8601 formats)
129127
["2001-02-01T12:30:45||", false, new DateTimeOffset(2001, 2, 1, 12, 30, 45, _now.Offset)],
130128
["2001-02-01T12:30:45||", true, new DateTimeOffset(2001, 2, 1, 12, 30, 45, _now.Offset)],
131129
["2001-02-01T12:30||", false, new DateTimeOffset(2001, 2, 1, 12, 30, 0, _now.Offset)],
@@ -141,9 +139,9 @@ public static IEnumerable<object[]> ExplicitDateInputs
141139
["2001-02-01||-1d", false, baseDate.AddDays(-1)],
142140
["2001-02-01||-1d", true, baseDate.AddDays(-1)],
143141

144-
// Complex example from Elasticsearch docs
145-
["2001.02.01||+1M/d", false, baseDate.AddMonths(1).StartOfDay()],
146-
["2001.02.01||+1M/d", true, baseDate.AddMonths(1).EndOfDay()],
142+
// With operations and rounding (basic_date format + operations)
143+
["20010201||+1M/d", false, baseDate.AddMonths(1).StartOfDay()],
144+
["20010201||+1M/d", true, baseDate.AddMonths(1).EndOfDay()],
147145

148146
// User's specific test case - UTC date with operations and rounding
149147
["2025-01-01T01:25:35Z||+3d/d", false, new DateTimeOffset(2025, 1, 4, 0, 0, 0, TimeSpan.Zero)],
@@ -169,17 +167,9 @@ public static IEnumerable<object[]> ExplicitDateInputs
169167
["2023-01-01T12:00:00.123+02:00||", false, new DateTimeOffset(2023, 1, 1, 12, 0, 0, 123, TimeSpan.FromHours(2))],
170168
["2023-01-01T12:00:00.123+02:00||", true, new DateTimeOffset(2023, 1, 1, 12, 0, 0, 123, TimeSpan.FromHours(2))],
171169

172-
// Different date separators
173-
["2023.06.15||", false, new DateTimeOffset(2023, 6, 15, 0, 0, 0, _now.Offset)],
174-
["2023.06.15||", true, new DateTimeOffset(2023, 6, 15, 0, 0, 0, _now.Offset)],
175-
["2023.06.15T10:30||", false, new DateTimeOffset(2023, 6, 15, 10, 30, 0, _now.Offset)],
176-
["2023.06.15T10:30||", true, new DateTimeOffset(2023, 6, 15, 10, 30, 0, _now.Offset)],
177-
178-
// Basic format variations
170+
// Basic format variations (yyyyMMdd is officially supported)
179171
["20230615||", false, new DateTimeOffset(2023, 6, 15, 0, 0, 0, _now.Offset)],
180-
["20230615||", true, new DateTimeOffset(2023, 6, 15, 0, 0, 0, _now.Offset)],
181-
["20230615T143000||", false, new DateTimeOffset(2023, 6, 15, 14, 30, 0, _now.Offset)],
182-
["20230615T143000||", true, new DateTimeOffset(2023, 6, 15, 14, 30, 0, _now.Offset)]
172+
["20230615||", true, new DateTimeOffset(2023, 6, 15, 0, 0, 0, _now.Offset)]
183173
};
184174
}
185175
}

tests/Exceptionless.DateTimeExtensions.Tests/FormatParsers/PartParsers/WildcardPartParserTests.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
using System;
22
using System.Collections.Generic;
3-
using System.Text.RegularExpressions;
43
using Exceptionless.DateTimeExtensions.FormatParsers.PartParsers;
54
using Microsoft.Extensions.Logging;
65
using Xunit;

0 commit comments

Comments
 (0)