You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
feat: Support field:(<term>...) Lucene searches (#1315)
# Summary
This PR updates HyperDX's lucene support to include parenthesized field searches of the form `<field>:(<term>...)`.
Prior to these changes, HyperDX would ignore the `<field>` entirely and search as if the query were just `<term>...`.
With these changes, the search is performed just like a `<term>...` search except:
1. The `field` is used for the search, instead of the implicit field expression (eg. `Body` for `otel_logs`)
2. The search is performed without `hasToken()`, as we assume that fields do not have bloom filters setup (matching the current behavior for how we search fields)
This support has the added benefit of unlocking multi-token substring searches (Ref HDX-1931)
- Previously, you could not search a field for a substring with multiple tokens, eg `error.message:*Method not allowed*` is interpreted as 3 separate terms, and only `*Method` would be associated with `error.message`. `error.message:"Method not allowed"` and `error.message:"*Method not allowed*"` look for exact matches, instead of substrings.
- Now, this can be accomplished with `error.message:("Method not allowed")`. This matches the current behavior of a search like `"Method not allowed"`, which would search the source's default implicit column (eg. `Body`) for the substring "Method not allowed".
## Testing
To test these changes, this PR adds a few dozen query parser unit test cases.
"(dynamicType(`serviceName`.`lte`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `serviceName`.`lte`.`test` <= '40')",
105
+
"(dynamicType(`ResourceAttributesJSON`.`lte`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `ResourceAttributesJSON`.`lte`.`test` <= '40')",
94
106
);
95
107
});
96
108
97
109
it('compare - gt',async()=>{
98
-
constgtField='serviceName.gt.test';
110
+
constgtField='ResourceAttributesJSON.gt.test';
99
111
constgtTerm='70';
100
-
constgt=awaitserializer.gt(gtField,gtTerm);
112
+
constgt=awaitserializer.gt(gtField,gtTerm,{});
101
113
expect(gt).toBe(
102
-
"(dynamicType(`serviceName`.`gt`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `serviceName`.`gt`.`test` > '70')",
114
+
"(dynamicType(`ResourceAttributesJSON`.`gt`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `ResourceAttributesJSON`.`gt`.`test` > '70')",
103
115
);
104
116
});
105
117
106
118
it('compare - lt',async()=>{
107
-
constltField='serviceName.lt.test';
119
+
constltField='ResourceAttributesJSON.lt.test';
108
120
constltTerm='2';
109
-
constlt=awaitserializer.lt(ltField,ltTerm);
121
+
constlt=awaitserializer.lt(ltField,ltTerm,{});
110
122
expect(lt).toBe(
111
-
"(dynamicType(`serviceName`.`lt`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `serviceName`.`lt`.`test` < '2')",
123
+
"(dynamicType(`ResourceAttributesJSON`.`lt`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `ResourceAttributesJSON`.`lt`.`test` < '2')",
112
124
);
113
125
});
126
+
127
+
consttestCases=[
128
+
{
129
+
lucene: '"foo bar baz"',
130
+
sql: "((hasToken(lower(Body), lower('foo')) AND hasToken(lower(Body), lower('bar')) AND hasToken(lower(Body), lower('baz')) AND (lower(Body) LIKE lower('%foo bar baz%'))))",
131
+
english: 'event has whole word "foo bar baz"',
132
+
},
133
+
{
134
+
lucene: 'foo bar baz',
135
+
sql: "((hasToken(lower(Body), lower('foo'))) AND (hasToken(lower(Body), lower('bar'))) AND (hasToken(lower(Body), lower('baz'))))",
136
+
english:
137
+
'event has whole word foo AND event has whole word bar AND event has whole word baz',
138
+
},
139
+
{
140
+
lucene: 'ServiceName:foo bar baz',
141
+
sql: "((ServiceName ILIKE '%foo%') AND (hasToken(lower(Body), lower('bar'))) AND (hasToken(lower(Body), lower('baz'))))",
142
+
english:
143
+
"'ServiceName' contains foo AND event has whole word bar AND event has whole word baz",
144
+
},
145
+
{
146
+
lucene: 'ServiceName:"foo bar baz"',
147
+
sql: "((ServiceName = 'foo bar baz'))",
148
+
english: "'ServiceName' is foo bar baz",
149
+
},
150
+
{
151
+
lucene: 'ServiceName:("foo bar baz")',
152
+
sql: "(((ServiceName ILIKE '%foo bar baz%')))",
153
+
english: '(ServiceName contains "foo bar baz")',
154
+
},
155
+
{
156
+
lucene: 'ServiceName:(abc def)',
157
+
sql: "(((ServiceName ILIKE '%abc%') AND (ServiceName ILIKE '%def%')))",
158
+
english: '(ServiceName contains abc AND ServiceName contains def)',
159
+
},
160
+
{
161
+
lucene: '(abc def)',
162
+
sql: "(((hasToken(lower(Body), lower('abc'))) AND (hasToken(lower(Body), lower('def')))))",
163
+
english: '(event has whole word abc AND event has whole word def)',
164
+
},
165
+
{
166
+
lucene: '("abc def")',
167
+
sql: "(((hasToken(lower(Body), lower('abc')) AND hasToken(lower(Body), lower('def')) AND (lower(Body) LIKE lower('%abc def%')))))",
168
+
english: '(event has whole word "abc def")',
169
+
},
170
+
{
171
+
lucene: 'foo:bar',
172
+
sql: "((foo ILIKE '%bar%'))",
173
+
english: "'foo' contains bar",
174
+
},
175
+
{
176
+
lucene: '(foo:bar)',
177
+
sql: "(((foo ILIKE '%bar%')))",
178
+
english: "('foo' contains bar)",
179
+
},
180
+
{
181
+
lucene: 'bar',
182
+
sql: "((hasToken(lower(Body), lower('bar'))))",
183
+
english: 'event has whole word bar',
184
+
},
185
+
{
186
+
lucene: '(bar)',
187
+
sql: "(((hasToken(lower(Body), lower('bar')))))",
188
+
english: '(event has whole word bar)',
189
+
},
190
+
{
191
+
lucene: 'foo:(bar)',
192
+
sql: "(((foo ILIKE '%bar%')))",
193
+
english: '(foo contains bar)',
194
+
},
195
+
{
196
+
lucene: 'foo:(bar) baz',
197
+
sql: "(((foo ILIKE '%bar%')) AND (hasToken(lower(Body), lower('baz'))))",
198
+
english: '(foo contains bar) AND event has whole word baz',
199
+
},
200
+
{
201
+
lucene: 'LogAttributes.error.message:("Failed to fetch")',
202
+
sql: "(((`LogAttributes`['error.message'] ILIKE '%Failed to fetch%')))",
203
+
english: '(LogAttributes.error.message contains "Failed to fetch")',
204
+
},
205
+
{
206
+
lucene: 'ResourceAttributesJSON.error.message:("Failed to fetch")',
207
+
sql: "(((toString(`ResourceAttributesJSON`.`error`.`message`) ILIKE '%Failed to fetch%')))",
208
+
english:
209
+
'(ResourceAttributesJSON.error.message contains "Failed to fetch")',
"((hasToken(lower(concatWithSeparator(';',Body,OtherColumn)), lower('foo'))) AND (hasToken(lower(concatWithSeparator(';',Body,OtherColumn)), lower('bar'))))";
0 commit comments