Skip to content

Commit 9ef101f

Browse files
committed
Allow FST builder to use different writer (#12543)
1 parent 12fc7bf commit 9ef101f

File tree

10 files changed

+627
-46
lines changed

10 files changed

+627
-46
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.store;
18+
19+
import java.io.IOException;
20+
import java.nio.ByteBuffer;
21+
import java.nio.ByteOrder;
22+
import java.nio.channels.FileChannel;
23+
import org.apache.lucene.util.Accountable;
24+
import org.apache.lucene.util.RamUsageEstimator;
25+
26+
/**
27+
* A {@link DataOutput} which writes to a {@link FileChannel}
28+
*
29+
* @lucene.experimental
30+
*/
31+
public class FileChannelDataOutput extends DataOutput implements Accountable {
32+
33+
private static final long BASE_RAM_BYTES_USED =
34+
RamUsageEstimator.shallowSizeOfInstance(FileChannelDataOutput.class);
35+
36+
// buffer to write to the channel
37+
private ByteBuffer buf = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN);
38+
39+
private final FileChannel channel;
40+
41+
/**
42+
* ctor
43+
*
44+
* @param channel the channel to write to
45+
*/
46+
public FileChannelDataOutput(FileChannel channel) {
47+
this.channel = channel;
48+
}
49+
50+
@Override
51+
public void writeByte(byte b) throws IOException {
52+
buf.clear().put(b).flip();
53+
this.channel.write(buf);
54+
}
55+
56+
@Override
57+
public void writeBytes(byte[] b, int offset, int length) throws IOException {
58+
this.channel.write(ByteBuffer.wrap(b, offset, length).order(ByteOrder.LITTLE_ENDIAN));
59+
}
60+
61+
@Override
62+
public void writeInt(int i) throws IOException {
63+
buf.clear().putInt(i).flip();
64+
this.channel.write(buf);
65+
}
66+
67+
@Override
68+
public void writeShort(short i) throws IOException {
69+
buf.clear().putShort(i).flip();
70+
this.channel.write(buf);
71+
}
72+
73+
@Override
74+
public void writeLong(long i) throws IOException {
75+
buf.clear().putLong(i).flip();
76+
this.channel.write(buf);
77+
}
78+
79+
public FileChannel getChannel() {
80+
return channel;
81+
}
82+
83+
@Override
84+
public long ramBytesUsed() {
85+
return BASE_RAM_BYTES_USED + buf.capacity();
86+
}
87+
}

lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
// TODO: merge with PagedBytes, except PagedBytes doesn't
2727
// let you read while writing which FST needs
2828

29-
class BytesStore extends DataOutput implements FSTReader {
29+
class BytesStore extends DataOutput implements FSTWriter {
3030

3131
private static final long BASE_RAM_BYTES_USED =
3232
RamUsageEstimator.shallowSizeOfInstance(BytesStore.class)
@@ -359,6 +359,7 @@ public void truncate(long newLen) {
359359
assert newLen == getPosition();
360360
}
361361

362+
@Override
362363
public void finish() {
363364
if (current != null) {
364365
byte[] lastBuffer = new byte[nextWrite];
@@ -368,6 +369,16 @@ public void finish() {
368369
}
369370
}
370371

372+
/** Writes all of our bytes to the target {@link FSTWriter}. */
373+
public void writeTo(FSTWriter out) throws IOException {
374+
// TODO: if the FSTWriter is also BytesStore we are doing double write
375+
// once to reverse the bytes and once to write to the BytesStore
376+
// maybe we should combine it into reverseAndWriteTo()?
377+
for (byte[] block : blocks) {
378+
out.writeBytes(block, 0, block.length);
379+
}
380+
}
381+
371382
/** Writes all of our bytes to the target {@link DataOutput}. */
372383
@Override
373384
public void writeTo(DataOutput out) throws IOException {
@@ -447,6 +458,11 @@ public FST.BytesReader getReverseBytesReader() {
447458
return getReverseReader(true);
448459
}
449460

461+
@Override
462+
public FST.BytesReader getReverseReaderForSuffixSharing() {
463+
return getReverseReader(false);
464+
}
465+
450466
FST.BytesReader getReverseReader(boolean allowSingle) {
451467
if (allowSingle && blocks.size() == 1) {
452468
return new ReverseBytesReader(blocks.get(0));
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.util.fst;
18+
19+
import java.io.Closeable;
20+
import java.io.IOException;
21+
import org.apache.lucene.store.DataOutput;
22+
import org.apache.lucene.store.RandomAccessInput;
23+
import org.apache.lucene.util.Accountable;
24+
import org.apache.lucene.util.RamUsageEstimator;
25+
26+
/**
27+
* An {@link FSTWriter} which write to a DataOutput
28+
*
29+
* @lucene.experimental
30+
*/
31+
public class DataOutputFSTWriter implements FSTWriter {
32+
33+
private static final long BASE_RAM_BYTES_USED =
34+
RamUsageEstimator.shallowSizeOfInstance(DataOutputFSTWriter.class);
35+
36+
private final DataOutput dataOutput;
37+
38+
private final RandomAccessInput dataInput;
39+
40+
protected boolean finish = false;
41+
42+
private long size = 0L;
43+
44+
/**
45+
* ctor
46+
*
47+
* @param dataOutput the data output to write to
48+
* @param dataInput the data input containing the written bytes to read from
49+
*/
50+
public DataOutputFSTWriter(DataOutput dataOutput, RandomAccessInput dataInput) {
51+
this.dataOutput = dataOutput;
52+
this.dataInput = dataInput;
53+
}
54+
55+
@Override
56+
public long ramBytesUsed() {
57+
long size = BASE_RAM_BYTES_USED;
58+
if (dataOutput instanceof Accountable) {
59+
size += ((Accountable) dataOutput).ramBytesUsed();
60+
}
61+
return size;
62+
}
63+
64+
@Override
65+
public long size() {
66+
return size;
67+
}
68+
69+
@Override
70+
public void writeByte(byte b) throws IOException {
71+
size++;
72+
dataOutput.writeByte(b);
73+
}
74+
75+
@Override
76+
public void writeBytes(byte[] b, int offset, int length) throws IOException {
77+
size += length;
78+
dataOutput.writeBytes(b, offset, length);
79+
}
80+
81+
@Override
82+
public void finish() throws IOException {
83+
finish = true;
84+
if (dataOutput instanceof Closeable) {
85+
((Closeable) dataOutput).close();
86+
}
87+
}
88+
89+
@Override
90+
public void writeTo(DataOutput out) throws IOException {
91+
// Technically we can support this method, as the DataOutput by this time has already been
92+
// closed.
93+
// But allow the FST which is already written to a DataOutput to be saved to another DataOutput
94+
// would be rather a strange use case
95+
throw new UnsupportedOperationException("writeTo(DataOutput) is not supported by this class");
96+
}
97+
98+
@Override
99+
public FST.BytesReader getReverseBytesReader() {
100+
// Technically we can support this method, as the DataOutput by this time has already been
101+
// closed.
102+
// However, I think ideally we would want the FSTWriter/FSTCompiler to only write the FST to the
103+
// DataOutput, and
104+
// some process later on can construct the FST using the FSTStore method.
105+
throw new UnsupportedOperationException(
106+
"getReverseBytesReader() is not supported by this class");
107+
}
108+
109+
@Override
110+
public FST.BytesReader getReverseReaderForSuffixSharing() {
111+
return new ReverseRandomAccessReader(dataInput);
112+
}
113+
}

lucene/core/src/java/org/apache/lucene/util/fst/FST.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,16 @@ void setEmptyOutput(T v) {
500500
}
501501

502502
public void save(DataOutput metaOut, DataOutput out) throws IOException {
503+
saveMetadata(metaOut);
504+
fstReader.writeTo(out);
505+
}
506+
507+
/**
508+
* Save the metadata to a DataOutput
509+
*
510+
* @param metaOut the DataOutput to save
511+
*/
512+
public void saveMetadata(DataOutput metaOut) throws IOException {
503513
if (startNode == -1) {
504514
throw new IllegalStateException("call finish first");
505515
}
@@ -541,7 +551,6 @@ public void save(DataOutput metaOut, DataOutput out) throws IOException {
541551
metaOut.writeByte(t);
542552
metaOut.writeVLong(startNode);
543553
metaOut.writeVLong(numBytes());
544-
fstReader.writeTo(out);
545554
}
546555

547556
/** Writes an automaton to a file. */

0 commit comments

Comments
 (0)