Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,17 @@
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.common.text;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;
package org.elasticsearch.xcontent;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;

/**
* Both {@link String} and {@link BytesReference} representation of the text. Starts with one of those, and if
* Both {@link String} and {@link ByteBuffer} representation of the text. Starts with one of those, and if
* the other is requests, caches the other one in a local reference so no additional conversion will be needed.
*/
public final class Text implements Comparable<Text>, ToXContentFragment {
public final class Text implements XContentString, Comparable<Text>, ToXContentFragment {

public static final Text[] EMPTY_ARRAY = new Text[0];

Expand All @@ -36,31 +31,35 @@ public static Text[] convertFromStringArray(String[] strings) {
return texts;
}

private BytesReference bytes;
private ByteBuffer bytes;
private String text;
private int hash;
private int length = -1;

public Text(ByteBuffer bytes) {
this.bytes = bytes;
}

public Text(BytesReference bytes) {
public Text(ByteBuffer bytes, int length) {
this.bytes = bytes;
this.length = length;
}

public Text(String text) {
this.text = text;
}

/**
* Whether a {@link BytesReference} view of the data is already materialized.
* Whether a {@link ByteBuffer} view of the data is already materialized.
*/
public boolean hasBytes() {
return bytes != null;
}

/**
* Returns a {@link BytesReference} view of the data.
*/
public BytesReference bytes() {
@Override
public ByteBuffer bytes() {
if (bytes == null) {
bytes = new BytesArray(text.getBytes(StandardCharsets.UTF_8));
bytes = StandardCharsets.UTF_8.encode(text);
}
return bytes;
}
Expand All @@ -72,11 +71,20 @@ public boolean hasString() {
return text != null;
}

/**
* Returns a {@link String} view of the data.
*/
@Override
public String string() {
return text == null ? bytes.utf8ToString() : text;
if (text == null) {
text = StandardCharsets.UTF_8.decode(bytes).toString();
}
return text;
}

@Override
public int length() {
if (length < 0) {
length = string().length();
}
return length;
}

@Override
Expand Down Expand Up @@ -115,8 +123,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
} else {
// TODO: TextBytesOptimization we can use a buffer here to convert it? maybe add a
// request to jackson to support InputStream as well?
BytesRef br = this.bytes().toBytesRef();
return builder.utf8Value(br.bytes, br.offset, br.length);
assert bytes.hasArray();
return builder.utf8Value(bytes.array(), bytes.arrayOffset() + bytes.position(), bytes.remaining());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.xcontent;

import java.nio.ByteBuffer;

public interface XContentString {
/**
* Returns a {@link String} view of the data.
*/
String string();

/**
* Returns a UTF8-encoded {@link ByteBuffer} view of the data.
*/
ByteBuffer bytes();

/**
* Returns the number of characters in the represented string.
*/
int length();
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.CountDown;
Expand All @@ -56,6 +55,7 @@
import org.elasticsearch.tasks.TaskManager;
import org.elasticsearch.threadpool.Scheduler;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xcontent.Text;

import java.util.ArrayList;
import java.util.Collections;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.settings.SecureString;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.CharArrays;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.xcontent.Text;

import java.io.EOFException;
import java.io.FilterInputStream;
Expand Down Expand Up @@ -391,13 +391,20 @@ public Text readOptionalText() throws IOException {
if (length == -1) {
return null;
}
return new Text(readBytesReference(length));
var byteBuffs = BytesReference.toByteBuffers(readBytesReference(length));
assert byteBuffs.length == 1;
return new Text(byteBuffs[0]);
}

public Text readText() throws IOException {
// use StringAndBytes so we can cache the string if it's ever converted to it
// use Text so we can cache the string if it's ever converted to it
int length = readInt();
return new Text(readBytesReference(length));
byte[] bytes = new byte[length];
if (length > 0) {
readBytes(bytes, 0, length);
}
var byteBuff = ByteBuffer.wrap(bytes);
return new Text(byteBuff);
}

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.io.stream.Writeable.Writer;
import org.elasticsearch.common.settings.SecureString;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.ByteUtils;
import org.elasticsearch.core.CharArrays;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
Expand Down Expand Up @@ -419,7 +419,7 @@ public void writeText(Text text) throws IOException {
writeInt(spare.length());
write(spare.bytes(), 0, spare.length());
} else {
BytesReference bytes = text.bytes();
BytesReference bytes = BytesReference.fromByteBuffer(text.bytes());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we could support reading/writing ByteBuffer directly in StreamInput/Output so that conversions are not necessary here. That could allow optimizing in the ByteArrayStreamInput case to create a ByteBuffer that wraps the underlying byte array with appropriate offset/length, without needing to copy bytes to a new array.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll look into that, thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I'm thinking this might be a bigger change than we want to incorporate into this PR. It might be better to leave it as a follow-up, since the current implementation will still work, it'll just be less optimized.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm OK with leaving this in a follow-up, it will help with reviews too

writeInt(bytes.length());
bytes.writeTo(this);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.xcontent.ChunkedToXContent;
import org.elasticsearch.common.xcontent.XContentHelper;
Expand All @@ -39,6 +38,7 @@
import org.elasticsearch.search.lookup.Source;
import org.elasticsearch.transport.LeakTracker;
import org.elasticsearch.transport.RemoteClusterAware;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.transport.RemoteClusterAware;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.util.Objects;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.elasticsearch.common.CheckedSupplier;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
Expand All @@ -34,6 +33,7 @@
import org.elasticsearch.lucene.search.uhighlight.Snippet;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.text.BreakIterator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.apache.lucene.search.vectorhighlight.SingleFragListBuilder;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextSearchInfo;
Expand All @@ -33,6 +32,7 @@
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions;
import org.elasticsearch.search.lookup.Source;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.text.BreakIterator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.util.BytesRefHash;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.util.ArrayList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.sort.SortAndFormats;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.rest.action.search.RestSearchAction;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
import org.apache.lucene.search.suggest.document.TopSuggestDocs;
import org.apache.lucene.search.suggest.document.TopSuggestDocsCollector;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.CompletionFieldMapper;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.Suggester;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.util.Collections;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.query.AbstractQueryBuilder;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.query.QueryBuilder;
Expand All @@ -31,6 +30,7 @@
import org.elasticsearch.search.suggest.Suggester;
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
import org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker.Result;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentParser;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.Suggest.Suggestion;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.util.Objects;
Expand Down
Loading