Skip to content

Commit 45adb11

Browse files
authored
SOLR-18071: Support stored fields in ExportWriter (#4053)
via a new includeStoredFields parameter
1 parent 9d9ac0f commit 45adb11

File tree

14 files changed

+633
-64
lines changed

14 files changed

+633
-64
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ This file lists Solr's raw release notes with details of every change to Solr. M
3333
- Added efSearch parameter to knn query, exposed efSearchScaleFactor that is used to calculate efSearch internally #17928 [SOLR-17928](https://issues.apache.org/jira/browse/SOLR-17928) (Puneet Ahuja) (Elia Porciani)
3434
- Support indexing primitive float[] values for DenseVectorField via JavaBin [SOLR-17948](https://issues.apache.org/jira/browse/SOLR-17948) (Puneet Ahuja) (Noble Paul)
3535
- Enable MergeOnFlushMergePolicy in Solr [SOLR-17984](https://issues.apache.org/jira/browse/SOLR-17984) ([Houston Putman](https://home.apache.org/phonebook.html?uid=houston) @HoustonPutman)
36+
- Add support for stored-only fields in ExportWriter with includeStoredFields=true. The default is false because it can negatively impact performance. [SOLR-18071](https://issues.apache.org/jira/browse/SOLR-18071) (Luke Kot-Zaniewski)
3637

3738
### Changed (30 changes)
3839

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
2+
title: Support including stored fields in Export Writer output.
3+
type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other
4+
authors:
5+
- name: Luke Kot-Zaniewski
6+
links:
7+
- name: SOLR-18071
8+
url: https://issues.apache.org/jira/browse/SOLR-18071

solr/core/src/java/org/apache/solr/handler/export/DoubleFieldWriter.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,15 @@ public DoubleFieldWriter(
3434
}
3535

3636
@Override
37-
public boolean write(
38-
SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex)
37+
public void write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew)
3938
throws IOException {
4039
double val;
4140
SortValue sortValue = sortDoc.getSortValue(this.field);
4241
if (sortValue != null) {
4342
if (sortValue.isPresent()) {
4443
val = (double) sortValue.getCurrentValue();
4544
} else { // empty-value
46-
return false;
45+
return;
4746
}
4847
} else {
4948
// field is not part of 'sort' param, but part of 'fl' param
@@ -53,10 +52,9 @@ public boolean write(
5352
if (vals != null) {
5453
val = Double.longBitsToDouble(vals.longValue());
5554
} else {
56-
return false;
55+
return;
5756
}
5857
}
5958
ew.put(this.field, val);
60-
return true;
6159
}
6260
}

solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java

Lines changed: 66 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@
2828
import java.lang.invoke.MethodHandles;
2929
import java.nio.charset.StandardCharsets;
3030
import java.util.ArrayList;
31+
import java.util.LinkedHashMap;
32+
import java.util.LinkedHashSet;
3133
import java.util.List;
3234
import java.util.Map;
35+
import java.util.Set;
3336
import java.util.TreeSet;
3437
import org.apache.lucene.index.LeafReader;
3538
import org.apache.lucene.index.LeafReaderContext;
@@ -99,15 +102,15 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
99102

100103
public static final String BATCH_SIZE_PARAM = "batchSize";
101104
public static final String QUEUE_SIZE_PARAM = "queueSize";
105+
public static final String INCLUDE_STORED_FIELDS_PARAM = "includeStoredFields";
102106

103107
public static final int DEFAULT_BATCH_SIZE = 30000;
104108
public static final int DEFAULT_QUEUE_SIZE = 150000;
105109
private static final FieldWriter EMPTY_FIELD_WRITER =
106110
new FieldWriter() {
107111
@Override
108-
public boolean write(
109-
SortDoc sortDoc, LeafReaderContext readerContext, EntryWriter out, int fieldIndex) {
110-
return false;
112+
public void write(SortDoc sortDoc, LeafReaderContext readerContext, EntryWriter out) {
113+
// do nothing
111114
}
112115
};
113116

@@ -482,45 +485,72 @@ void writeDoc(
482485
throws IOException {
483486
int ord = sortDoc.ord;
484487
LeafReaderContext context = leaves.get(ord);
485-
int fieldIndex = 0;
486488
for (FieldWriter fieldWriter : writers) {
487-
if (fieldWriter.write(sortDoc, context, ew, fieldIndex)) {
488-
++fieldIndex;
489-
}
489+
fieldWriter.write(sortDoc, context, ew);
490490
}
491491
}
492492

493493
public List<FieldWriter> getFieldWriters(String[] fields, SolrQueryRequest req)
494494
throws IOException {
495495
DocValuesIteratorCache dvIterCache = new DocValuesIteratorCache(req.getSearcher(), false);
496-
497496
SolrReturnFields solrReturnFields = new SolrReturnFields(fields, req);
497+
boolean includeStoredFields = req.getParams().getBool(INCLUDE_STORED_FIELDS_PARAM, false);
498498

499499
List<FieldWriter> writers = new ArrayList<>();
500+
Set<SchemaField> docValueFields = new LinkedHashSet<>();
501+
Map<String, SchemaField> storedFields = new LinkedHashMap<>();
502+
500503
for (String field : req.getSearcher().getFieldNames()) {
501504
if (!solrReturnFields.wantsField(field)) {
502505
continue;
503506
}
504507
SchemaField schemaField = req.getSchema().getField(field);
505-
if (!schemaField.hasDocValues()) {
506-
throw new IOException(schemaField + " must have DocValues to use this feature.");
507-
}
508-
boolean multiValued = schemaField.multiValued();
509508
FieldType fieldType = schemaField.getType();
510-
FieldWriter writer;
511509

512-
if (fieldType instanceof SortableTextField && !schemaField.useDocValuesAsStored()) {
513-
if (solrReturnFields.getRequestedFieldNames() != null
514-
&& solrReturnFields.getRequestedFieldNames().contains(field)) {
515-
// Explicitly requested field cannot be used due to not having useDocValuesAsStored=true,
516-
// throw exception
510+
Set<String> requestFieldNames =
511+
solrReturnFields.getRequestedFieldNames() == null
512+
? Set.of()
513+
: solrReturnFields.getRequestedFieldNames();
514+
515+
if (canUseDocValues(schemaField, fieldType)) {
516+
// Prefer DocValues when available
517+
docValueFields.add(schemaField);
518+
} else if (schemaField.stored()) {
519+
// Field is stored-only (no usable DocValues)
520+
if (includeStoredFields) {
521+
storedFields.put(field, schemaField);
522+
} else if (requestFieldNames.contains(field)) {
523+
// Explicitly requested field without DocValues and includeStoredFields=false
524+
throw new IOException(
525+
schemaField
526+
+ " must have DocValues to use this feature. "
527+
+ "Try setting includeStoredFields=true to retrieve this field from stored values.");
528+
}
529+
// Else: glob matched stored-only field without includeStoredFields - silently skip
530+
} else if (requestFieldNames.contains(field)) {
531+
// Explicitly requested field that has neither DocValues nor stored
532+
if (fieldType instanceof SortableTextField && !schemaField.useDocValuesAsStored()) {
517533
throw new IOException(
518534
schemaField + " Must have useDocValuesAsStored='true' to be used with export writer");
519535
} else {
520-
// Glob pattern matched field cannot be used due to not having useDocValuesAsStored=true
521-
continue;
536+
throw new IOException(
537+
schemaField + " must have DocValues or be stored to use this feature.");
522538
}
523539
}
540+
// Else: glob matched field with neither DocValues nor stored - silently skip
541+
}
542+
543+
for (SchemaField schemaField : docValueFields) {
544+
String field = schemaField.getName();
545+
boolean multiValued = schemaField.multiValued();
546+
FieldType fieldType = schemaField.getType();
547+
FieldWriter writer;
548+
549+
if (schemaField.stored() && !storedFields.isEmpty()) {
550+
// if we're reading StoredFields *anyway*, then we might as well avoid this extra DV lookup
551+
storedFields.put(field, schemaField);
552+
continue;
553+
}
524554

525555
DocValuesIteratorCache.FieldDocValuesSupplier docValuesCache = dvIterCache.getSupplier(field);
526556

@@ -574,9 +604,24 @@ public List<FieldWriter> getFieldWriters(String[] fields, SolrQueryRequest req)
574604
}
575605
writers.add(writer);
576606
}
607+
608+
if (!storedFields.isEmpty()) {
609+
writers.add(new StoredFieldsWriter(storedFields));
610+
}
611+
577612
return writers;
578613
}
579614

615+
private static boolean canUseDocValues(SchemaField schemaField, FieldType fieldType) {
616+
return schemaField.hasDocValues()
617+
// Special handling for SortableTextField: unlike other field types, it requires
618+
// useDocValuesAsStored=true to be included via glob patterns in /export. This
619+
// matches the behavior of /select (which requires useDocValuesAsStored=true for
620+
// all globbed fields) and avoids performance issues. The requirement cannot be
621+
// extended to other field types in /export for backward compatibility reasons.
622+
&& (!(fieldType instanceof SortableTextField) || schemaField.useDocValuesAsStored());
623+
}
624+
580625
SortDoc getSortDoc(SolrIndexSearcher searcher, SortField[] sortFields) throws IOException {
581626
SortValue[] sortValues = new SortValue[sortFields.length];
582627
IndexSchema schema = searcher.getSchema();
@@ -591,7 +636,7 @@ SortDoc getSortDoc(SolrIndexSearcher searcher, SortField[] sortFields) throws IO
591636
throw new IOException(field + " must have DocValues to use this feature.");
592637
}
593638

594-
if (ft instanceof SortableTextField && schemaField.useDocValuesAsStored() == false) {
639+
if (ft instanceof SortableTextField && !schemaField.useDocValuesAsStored()) {
595640
throw new IOException(
596641
schemaField + " Must have useDocValuesAsStored='true' to be used with export writer");
597642
}

solr/core/src/java/org/apache/solr/handler/export/FieldWriter.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,15 @@
2222
import org.apache.solr.common.MapWriter;
2323

2424
abstract class FieldWriter {
25-
public abstract boolean write(
26-
SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out, int fieldIndex)
25+
/**
26+
* Writes field values from the document to the output.
27+
*
28+
* @param sortDoc the document being exported
29+
* @param readerContext the leaf reader context for accessing field values
30+
* @param out the output writer to write field values to
31+
* @throws IOException if an I/O error occurs while reading or writing field values
32+
*/
33+
public abstract void write(
34+
SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out)
2735
throws IOException;
2836
}

solr/core/src/java/org/apache/solr/handler/export/FloatFieldWriter.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,15 @@ public FloatFieldWriter(
3434
}
3535

3636
@Override
37-
public boolean write(
38-
SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex)
37+
public void write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew)
3938
throws IOException {
4039
float val;
4140
SortValue sortValue = sortDoc.getSortValue(this.field);
4241
if (sortValue != null) {
4342
if (sortValue.isPresent()) {
4443
val = (float) sortValue.getCurrentValue();
4544
} else { // empty-value
46-
return false;
45+
return;
4746
}
4847
} else {
4948
// field is not part of 'sort' param, but part of 'fl' param
@@ -53,10 +52,9 @@ public boolean write(
5352
if (vals != null) {
5453
val = Float.intBitsToFloat((int) vals.longValue());
5554
} else {
56-
return false;
55+
return;
5756
}
5857
}
5958
ew.put(this.field, val);
60-
return true;
6159
}
6260
}

solr/core/src/java/org/apache/solr/handler/export/IntFieldWriter.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,15 @@ public IntFieldWriter(
3434
}
3535

3636
@Override
37-
public boolean write(
38-
SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex)
37+
public void write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew)
3938
throws IOException {
4039
int val;
4140
SortValue sortValue = sortDoc.getSortValue(this.field);
4241
if (sortValue != null) {
4342
if (sortValue.isPresent()) {
4443
val = (int) sortValue.getCurrentValue();
4544
} else { // empty-value
46-
return false;
45+
return;
4746
}
4847
} else {
4948
// field is not part of 'sort' param, but part of 'fl' param
@@ -53,10 +52,9 @@ public boolean write(
5352
if (vals != null) {
5453
val = (int) vals.longValue();
5554
} else {
56-
return false;
55+
return;
5756
}
5857
}
5958
ew.put(this.field, val);
60-
return true;
6159
}
6260
}

solr/core/src/java/org/apache/solr/handler/export/LongFieldWriter.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,15 @@ public LongFieldWriter(
3535
}
3636

3737
@Override
38-
public boolean write(
39-
SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew, int fieldIndex)
38+
public void write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter ew)
4039
throws IOException {
4140
long val;
4241
SortValue sortValue = sortDoc.getSortValue(this.field);
4342
if (sortValue != null) {
4443
if (sortValue.isPresent()) {
4544
val = (long) sortValue.getCurrentValue();
4645
} else { // empty-value
47-
return false;
46+
return;
4847
}
4948
} else {
5049
// field is not part of 'sort' param, but part of 'fl' param
@@ -54,11 +53,10 @@ public boolean write(
5453
if (vals != null) {
5554
val = vals.longValue();
5655
} else {
57-
return false;
56+
return;
5857
}
5958
}
6059
doWrite(ew, val);
61-
return true;
6260
}
6361

6462
protected void doWrite(MapWriter.EntryWriter ew, long val) throws IOException {

solr/core/src/java/org/apache/solr/handler/export/MultiFieldWriter.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,14 @@ public MultiFieldWriter(
6161
}
6262

6363
@Override
64-
public boolean write(
65-
SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out, int fieldIndex)
64+
public void write(SortDoc sortDoc, LeafReaderContext readerContext, MapWriter.EntryWriter out)
6665
throws IOException {
6766
if (this.fieldType.isPointField()) {
6867
SortedNumericDocValues vals =
6968
docValuesCache.getSortedNumericDocValues(
7069
sortDoc.docId, readerContext.reader(), readerContext.ord);
7170
if (vals == null) {
72-
return false;
71+
return;
7372
}
7473

7574
final SortedNumericDocValues docVals = vals;
@@ -82,13 +81,12 @@ public boolean write(
8281
w.add(bitsToValue.apply(docVals.nextValue()));
8382
}
8483
});
85-
return true;
8684
} else {
8785
SortedSetDocValues vals =
8886
docValuesCache.getSortedSetDocValues(
8987
sortDoc.docId, readerContext.reader(), readerContext.ord);
9088
if (vals == null) {
91-
return false;
89+
return;
9290
}
9391

9492
final SortedSetDocValues docVals = vals;
@@ -105,7 +103,6 @@ public boolean write(
105103
else w.add(fieldType.toObject(f));
106104
}
107105
});
108-
return true;
109106
}
110107
}
111108

0 commit comments

Comments
 (0)