Skip to content

Commit af8afee

Browse files
gerlowskijaclaude
andauthored
SOLR-13309: Introduce LongRangeField to expose Lucene 'LongRange' (#4192)
This commit adds a new field type, LongRangeField, that can be used to hold singular or multi-dimensional (up to 4) ranges of longs. LongRangeField is compatible with the previously added `{!numericRange}` and supports similar syntax. Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 4af06a9 commit af8afee

File tree

13 files changed

+1527
-229
lines changed

13 files changed

+1527
-229
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
title: Introduce new `LongRangeField` field type and (experimental) `{!numericRange}` query parser for storing and querying long ranges
2+
type: added
3+
authors:
4+
- name: Jason Gerlowski
5+
links:
6+
- name: SOLR-13309
7+
url: https://issues.apache.org/jira/browse/SOLR-13309
Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.solr.schema.numericrange;
18+
19+
import java.io.IOException;
20+
import java.util.ArrayList;
21+
import java.util.List;
22+
import java.util.Map;
23+
import java.util.regex.Pattern;
24+
import org.apache.lucene.document.StoredField;
25+
import org.apache.lucene.index.IndexableField;
26+
import org.apache.lucene.search.Query;
27+
import org.apache.lucene.search.SortField;
28+
import org.apache.solr.common.SolrException;
29+
import org.apache.solr.common.SolrException.ErrorCode;
30+
import org.apache.solr.response.TextResponseWriter;
31+
import org.apache.solr.schema.IndexSchema;
32+
import org.apache.solr.schema.PrimitiveFieldType;
33+
import org.apache.solr.schema.SchemaField;
34+
import org.apache.solr.search.QParser;
35+
import org.apache.solr.uninverting.UninvertingReader.Type;
36+
37+
/**
38+
* Abstract base class for numeric range field types that wrap Lucene's multi-dimensional range
39+
* fields (e.g., {@link org.apache.lucene.document.IntRange}, {@link
40+
* org.apache.lucene.document.LongRange}).
41+
*
42+
* <p>Provides common infrastructure for range field types including:
43+
*
44+
* <ul>
45+
* <li>Configurable number of dimensions (1–4) via the {@code numDimensions} schema attribute
46+
* <li>Shared regex patterns for parsing range value strings
47+
* <li>Standard field lifecycle methods (init, createFields, write, etc.)
48+
* </ul>
49+
*
50+
* <p>Concrete subclasses must implement {@link #parseRangeValue(String)} to parse the string
51+
* representation into a type-specific range value, and {@link #createField(SchemaField, Object)} to
52+
* produce the underlying Lucene {@link IndexableField}.
53+
*
54+
* @see IntRangeField
55+
* @see LongRangeField
56+
*/
57+
public abstract class AbstractNumericRangeField extends PrimitiveFieldType {
58+
59+
/**
60+
* Marker interface for parsed range value objects. Implemented by the inner {@code RangeValue}
61+
* classes of concrete subclasses so that {@link #toNativeType(Object)} can identify already-
62+
* parsed values without knowing the concrete type.
63+
*
64+
* <p>Concrete subclasses override {@link #parseRangeValue(String)} with a covariant return type
65+
* so callers within the subclass receive the concrete type directly (e.g. {@code
66+
* IntRangeField.RangeValue}) with no casting required.
67+
*/
68+
public interface NumericRangeValue {
69+
int getDimensions();
70+
}
71+
72+
/** Regex fragment matching a comma-separated list of signed integers (no decimal points). */
73+
protected static final String COMMA_DELIMITED_NUMS = "-?\\d+(?:\\s*,\\s*-?\\d+)*";
74+
75+
private static final String RANGE_PATTERN_STR =
76+
"\\[\\s*(" + COMMA_DELIMITED_NUMS + ")\\s+TO\\s+(" + COMMA_DELIMITED_NUMS + ")\\s*\\]";
77+
78+
/** Pre-compiled pattern matching {@code [min1,min2,... TO max1,max2,...]} range syntax. */
79+
protected static final Pattern RANGE_PATTERN_REGEX = Pattern.compile(RANGE_PATTERN_STR);
80+
81+
/** Pre-compiled pattern matching a single (multi-dimensional) bound, e.g. {@code 1,2,3}. */
82+
protected static final Pattern SINGLE_BOUND_PATTERN =
83+
Pattern.compile("^" + COMMA_DELIMITED_NUMS + "$");
84+
85+
/** Configured number of dimensions for this field type; defaults to 1. */
86+
protected int numDimensions = 1;
87+
88+
@Override
89+
protected boolean enableDocValuesByDefault() {
90+
return false; // Range fields do not support docValues
91+
}
92+
93+
@Override
94+
protected void init(IndexSchema schema, Map<String, String> args) {
95+
super.init(schema, args);
96+
97+
String numDimensionsStr = args.remove("numDimensions");
98+
if (numDimensionsStr != null) {
99+
numDimensions = Integer.parseInt(numDimensionsStr);
100+
if (numDimensions < 1 || numDimensions > 4) {
101+
throw new SolrException(
102+
ErrorCode.SERVER_ERROR,
103+
"numDimensions must be between 1 and 4, but was ["
104+
+ numDimensions
105+
+ "] for field type "
106+
+ typeName);
107+
}
108+
}
109+
110+
// Range fields do not support docValues - validate this wasn't explicitly enabled
111+
if (hasProperty(DOC_VALUES)) {
112+
throw new SolrException(
113+
ErrorCode.SERVER_ERROR,
114+
"docValues=true enabled but "
115+
+ getClass().getSimpleName()
116+
+ " does not support docValues for field type "
117+
+ typeName);
118+
}
119+
}
120+
121+
@Override
122+
public List<IndexableField> createFields(SchemaField field, Object value) {
123+
IndexableField indexedField = createField(field, value);
124+
List<IndexableField> fields = new ArrayList<>();
125+
126+
if (indexedField != null) {
127+
fields.add(indexedField);
128+
}
129+
130+
if (field.stored()) {
131+
fields.add(getStoredField(field, value.toString()));
132+
}
133+
134+
return fields;
135+
}
136+
137+
protected StoredField getStoredField(SchemaField sf, Object value) {
138+
return new StoredField(sf.getName(), value.toString());
139+
}
140+
141+
@Override
142+
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
143+
writer.writeStr(name, toExternal(f), false);
144+
}
145+
146+
@Override
147+
public SortField getSortField(SchemaField field, boolean top) {
148+
throw new SolrException(
149+
ErrorCode.BAD_REQUEST,
150+
"Cannot sort on " + getClass().getSimpleName() + ": " + field.getName());
151+
}
152+
153+
@Override
154+
public Type getUninversionType(SchemaField sf) {
155+
return null; // No field cache support
156+
}
157+
158+
@Override
159+
public String toInternal(String val) {
160+
// Validate format and return as-is
161+
parseRangeValue(val);
162+
return val;
163+
}
164+
165+
@Override
166+
public String toExternal(IndexableField f) {
167+
return f.stringValue();
168+
}
169+
170+
@Override
171+
public Object toNativeType(Object val) {
172+
if (val == null) return null;
173+
if (val instanceof NumericRangeValue) return val;
174+
return parseRangeValue(val.toString());
175+
}
176+
177+
/**
178+
* Parse a range value string into a type-specific range value object.
179+
*
180+
* <p>Implementations should accept the {@code [min1,min2,... TO max1,max2,...]} bracket notation
181+
* (using {@link #RANGE_PATTERN_REGEX}) and validate that:
182+
*
183+
* <ul>
184+
* <li>The format matches the expected pattern
185+
* <li>The number of dimensions in the value matches {@link #numDimensions}
186+
* <li>Each min value is less than or equal to the corresponding max value
187+
* </ul>
188+
*
189+
* <p>Subclasses should override this with a covariant return type (their concrete inner {@code
190+
* RangeValue} class) so that internal callers receive the fully-typed value without casting.
191+
*
192+
* @param value the string value in bracket notation
193+
* @return a {@link NumericRangeValue} holding the parsed min/max arrays
194+
* @throws SolrException if value format is invalid
195+
*/
196+
public abstract NumericRangeValue parseRangeValue(String value);
197+
198+
/**
199+
* Parses a single N-dimensional point expressed as a comma-separated string (e.g. {@code "5"} or
200+
* {@code "5,10"}) into a {@link NumericRangeValue} where both mins and maxs are set to the parsed
201+
* bound.
202+
*
203+
* <p>This is used by {@link #getFieldQuery} to support the "single bound" query shorthand, where
204+
* a bare coordinate is treated as a degenerate range {@code [p TO p]}. Dimension-count validation
205+
* against {@link #numDimensions} is performed by the caller and does not need to be repeated
206+
* here.
207+
*
208+
* <p>Subclasses should override with a covariant return type so that internal callers receive the
209+
* concrete {@code RangeValue} type without casting.
210+
*
211+
* @param value a comma-separated numeric string (e.g. {@code "5,10"} for a 2D point)
212+
* @return a {@link NumericRangeValue} with mins and maxs both equal to the parsed bound
213+
* @throws SolrException if the string contains non-numeric values
214+
*/
215+
public abstract NumericRangeValue parseSingleBound(String value);
216+
217+
/**
218+
* Creates a Lucene query that matches indexed documents whose stored range <em>contains</em> the
219+
* query range described by {@code rangeValue}.
220+
*
221+
* <p>This is the default query semantics used by {@link #getFieldQuery}. Queries with other match
222+
* semantics (intersects, within, crosses) are available via {@link
223+
* org.apache.solr.search.numericrange.NumericRangeQParserPlugin}.
224+
*
225+
* <p>The {@code rangeValue} argument may originate from either {@link #parseRangeValue} (full
226+
* {@code [min TO max]} syntax) or {@link #parseSingleBound} (point query shorthand). In the point
227+
* case, mins and maxs are equal, so the query finds documents whose range contains that exact
228+
* point.
229+
*
230+
* @param field the name of the field to query
231+
* @param rangeValue a pre-parsed range value produced by this field type
232+
* @return a contains query for the given field and range
233+
*/
234+
public abstract Query newContainsQuery(String field, NumericRangeValue rangeValue);
235+
236+
/**
237+
* Creates a Lucene query that matches indexed documents whose stored range <em>intersects</em>
238+
* the query range described by {@code rangeValue}.
239+
*
240+
* @param field the name of the field to query
241+
* @param rangeValue a pre-parsed range value produced by this field type
242+
* @return an intersects query for the given field and range
243+
*/
244+
public abstract Query newIntersectsQuery(String field, NumericRangeValue rangeValue);
245+
246+
/**
247+
* Creates a Lucene query that matches indexed documents whose stored range is <em>within</em> the
248+
* query range described by {@code rangeValue}.
249+
*
250+
* @param field the name of the field to query
251+
* @param rangeValue a pre-parsed range value produced by this field type
252+
* @return a within query for the given field and range
253+
*/
254+
public abstract Query newWithinQuery(String field, NumericRangeValue rangeValue);
255+
256+
/**
257+
* Creates a Lucene query that matches indexed documents whose stored range <em>crosses</em> the
258+
* boundaries of the query range described by {@code rangeValue}.
259+
*
260+
* @param field the name of the field to query
261+
* @param rangeValue a pre-parsed range value produced by this field type
262+
* @return a crosses query for the given field and range
263+
*/
264+
public abstract Query newCrossesQuery(String field, NumericRangeValue rangeValue);
265+
266+
/**
267+
* Creates a query for this field that matches docs where the query-range is fully contained by
268+
* the field value.
269+
*
270+
* <p>Queries requiring other match semantics can use {@link
271+
* org.apache.solr.search.numericrange.NumericRangeQParserPlugin}
272+
*
273+
* @param parser The {@link org.apache.solr.search.QParser} calling the method
274+
* @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
275+
* @param externalVal The String representation of the value to search. Supports both a
276+
* (multi-)dimensional range of the form [1,2 TO 3,4], or a single (multi-)dimensional bound
277+
* (e.g. 1,2). In the latter case, the single bound will be used as both the min and max. Both
278+
* formats use "contains" query semantics to find indexed ranges that contain the query range.
279+
* @return Query for this field using contains semantics
280+
*/
281+
@Override
282+
public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) {
283+
if (externalVal == null || externalVal.trim().isEmpty()) {
284+
throw new SolrException(ErrorCode.BAD_REQUEST, "Query value cannot be null or empty");
285+
}
286+
287+
String trimmed = externalVal.trim();
288+
289+
// Check if it's the full range syntax: [min1,min2 TO max1,max2]
290+
if (RANGE_PATTERN_REGEX.matcher(trimmed).matches()) {
291+
final var rangeValue = parseRangeValue(trimmed);
292+
return newContainsQuery(field.getName(), rangeValue);
293+
}
294+
295+
// Syntax sugar: also accept a single-bound (i.e pX,pY,pZ)
296+
if (SINGLE_BOUND_PATTERN.matcher(trimmed).matches()) {
297+
final var singleBoundRange = parseSingleBound(trimmed);
298+
299+
if (singleBoundRange.getDimensions() != numDimensions) {
300+
throw new SolrException(
301+
ErrorCode.BAD_REQUEST,
302+
"Single bound dimensions ("
303+
+ singleBoundRange.getDimensions()
304+
+ ") do not match field type numDimensions ("
305+
+ numDimensions
306+
+ ")");
307+
}
308+
309+
return newContainsQuery(field.getName(), singleBoundRange);
310+
}
311+
312+
throw new SolrException(
313+
ErrorCode.BAD_REQUEST,
314+
"Invalid query format. Expected either a range [min TO max] or a single bound to search for, got: "
315+
+ externalVal);
316+
}
317+
}

0 commit comments

Comments
 (0)