Skip to content
Open
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class MetadataIndexNode {

private static final double LOG2 = Math.log(2);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's this magic number used for?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Used for calculating complexity, it will select the search method according to the complexity.

protected static final TSFileConfig config = TSFileDescriptor.getInstance().getConfig();
protected final List<IMetadataIndexEntry> children;
protected long endOffset;
Expand Down Expand Up @@ -127,6 +129,10 @@ public static MetadataIndexNode deserializeFrom(

public Pair<IMetadataIndexEntry, Long> getChildIndexEntry(Comparable key, boolean exactSearch) {
int index = binarySearchInChildren(key, exactSearch);
return getChildIndexEntry(index);
}

private Pair<IMetadataIndexEntry, Long> getChildIndexEntry(int index) {
if (index == -1) {
return null;
}
Expand Down Expand Up @@ -165,6 +171,105 @@ int binarySearchInChildren(Comparable key, boolean exactSearch) {
}
}

public List<Pair<IMetadataIndexEntry, Long>> getChildIndexEntries(
List<? extends Comparable> keys, boolean exactSearch) {
int[] indexArr =
keys.size() >= children.size()
|| (keys.size() * Math.log(children.size()) / LOG2)
> (keys.size() + children.size())
? mergeSearchInChildren(keys, exactSearch)
: binarySearchInChildren(keys, exactSearch);
List<Pair<IMetadataIndexEntry, Long>> pairs = new ArrayList<>();
int previousIndex = -1;
Pair<IMetadataIndexEntry, Long> previousPair = null;
for (int idx : indexArr) {
if (previousIndex == idx) {
pairs.add(previousPair);
} else {
Pair<IMetadataIndexEntry, Long> current = getChildIndexEntry(idx);
pairs.add(current);
previousIndex = idx;
previousPair = current;
}
}
return pairs;
}

int[] binarySearchInChildren(List<? extends Comparable> keys, boolean exactSearch) {
int[] results = new int[keys.size()];
Arrays.fill(results, -1);
int currentLow = 0;
int high = children.size() - 1;

for (int i = 0; i < keys.size(); i++) {
Comparable key = keys.get(i);
if (currentLow > high) {
Arrays.fill(results, i, keys.size(), exactSearch ? -1 : currentLow - 1);
return results;
}

int foundIndex = -1;
int start = currentLow;
int end = high;

while (start <= end) {
int mid = (start + end) >>> 1;
IMetadataIndexEntry midVal = children.get(mid);
int cmp = midVal.getCompareKey().compareTo(key);

if (cmp < 0) {
start = mid + 1;
} else if (cmp > 0) {
end = mid - 1;
} else {
foundIndex = mid;
break;
}
}

if (foundIndex >= 0) {
results[i] = foundIndex;
currentLow = foundIndex + 1;
} else {
if (exactSearch) {
results[i] = -1;
} else {
int insertPos = start - 1;
results[i] = insertPos;
currentLow = start;
}
}
}
return results;
}

int[] mergeSearchInChildren(List<? extends Comparable> keys, boolean exactSearch) {
int[] results = new int[keys.size()];
int i = 0;
int j = 0;
while (i < keys.size() && j < children.size()) {
Comparable currentKey = keys.get(i);
Comparable currentChild = children.get(j).getCompareKey();
int cmp = currentKey.compareTo(currentChild);
if (cmp == 0) {
results[i] = j;
i++;
j++;
} else if (cmp > 0) {
j++;
} else {
if (exactSearch) {
results[i] = -1;
} else {
results[i] = j - 1;
}
i++;
}
}
Arrays.fill(results, i, keys.size(), exactSearch ? -1 : children.size() - 1);
return results;
}

public boolean isDeviceLevel() {
return this.nodeType == MetadataIndexNodeType.INTERNAL_DEVICE
|| this.nodeType == MetadataIndexNodeType.LEAF_DEVICE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,82 @@ private Map<String, TimeseriesMetadata> readDeviceMetadataFromDisk(IDeviceID dev
return deviceMetadata;
}

/**
* Find the offset of MetadataIndexNode corresponding to every device to avoid repeated reading of
* internal MetadataIndexNode
*
* @param table table name, or "" for tree model
* @param sortedDevices devices should be sorted
* @param ioSizeRecorder can be null
* @return Each element of the outer array corresponds to the device at this index. The inner
* array size is 2, the first element is the start offset, and the second is the end offset
* @throws IOException io error
*/
public long[][] getDeviceMetadataIndexNodeOffsets(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using long[] may be better for memory consumption?

String table, List<IDeviceID> sortedDevices, LongConsumer ioSizeRecorder) throws IOException {
readFileMetadata();
MetadataIndexNode tableMetadataIndexNode = getTableRootNode(table);
if (tableMetadataIndexNode == null) {
throw new IllegalArgumentException(
"table {" + table + "} is not in tsFileMetaData of " + file);
}
long[][] results = new long[sortedDevices.size()][];
if (sortedDevices.isEmpty()) {
return results;
}
getDeviceMetadataIndexNodeOffsets(
results, sortedDevices, 0, sortedDevices.size(), tableMetadataIndexNode, ioSizeRecorder);
return results;
}

private void getDeviceMetadataIndexNodeOffsets(
long[][] results,
List<IDeviceID> devices,
int deviceStartIdx,
int deviceEndIdx,
MetadataIndexNode startNode,
LongConsumer ioSizeRecorder)
throws IOException {
MetadataIndexNodeType metadataIndexNodeType = startNode.getNodeType();
boolean exactSearch = metadataIndexNodeType == MetadataIndexNodeType.LEAF_DEVICE;
List<Pair<IMetadataIndexEntry, Long>> entries =
startNode.getChildIndexEntries(devices.subList(deviceStartIdx, deviceEndIdx), exactSearch);
Iterator<Pair<IMetadataIndexEntry, Long>> metadataIndexEntriesIterator = entries.iterator();
int startIdxOfChild = deviceStartIdx;
Pair<IMetadataIndexEntry, Long> previousPair = null;
for (int i = deviceStartIdx; i < deviceEndIdx; i++) {
Pair<IMetadataIndexEntry, Long> pair = metadataIndexEntriesIterator.next();
if (exactSearch) {
results[i] = pair == null ? null : new long[] {pair.getLeft().getOffset(), pair.getRight()};
continue;
}
if (previousPair == null) {
previousPair = pair;
continue;
}
if (previousPair == pair) {
continue;
}
IMetadataIndexEntry entry = previousPair.getLeft();
ByteBuffer buffer = readData(entry.getOffset(), previousPair.getRight(), ioSizeRecorder);
MetadataIndexNode lastNode =
MetadataIndexNode.deserializeFrom(buffer, true, deserializeConfig);
getDeviceMetadataIndexNodeOffsets(
results, devices, startIdxOfChild, i, lastNode, ioSizeRecorder);
previousPair = pair;
startIdxOfChild = i;
}
if (exactSearch || previousPair == null) {
return;
}
// for last entry
IMetadataIndexEntry entry = previousPair.getLeft();
ByteBuffer buffer = readData(entry.getOffset(), previousPair.getRight(), ioSizeRecorder);
MetadataIndexNode lastNode = MetadataIndexNode.deserializeFrom(buffer, true, deserializeConfig);
getDeviceMetadataIndexNodeOffsets(
results, devices, startIdxOfChild, deviceEndIdx, lastNode, ioSizeRecorder);
}

public TimeseriesMetadata readTimeseriesMetadata(
IDeviceID device, String measurement, boolean ignoreNotExists) throws IOException {
return readTimeseriesMetadata(device, measurement, ignoreNotExists, null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,13 @@
import org.apache.tsfile.file.header.ChunkGroupHeader;
import org.apache.tsfile.file.header.ChunkHeader;
import org.apache.tsfile.file.header.PageHeader;
import org.apache.tsfile.file.metadata.AbstractAlignedChunkMetadata;
import org.apache.tsfile.file.metadata.ChunkMetadata;
import org.apache.tsfile.file.metadata.IChunkMetadata;
import org.apache.tsfile.file.metadata.IDeviceID;
import org.apache.tsfile.file.metadata.IDeviceID.Factory;
import org.apache.tsfile.file.metadata.MetadataIndexNode;
import org.apache.tsfile.file.metadata.StringArrayDeviceID;
import org.apache.tsfile.file.metadata.TableSchema;
import org.apache.tsfile.file.metadata.enums.TSEncoding;
import org.apache.tsfile.read.common.Path;
Expand Down Expand Up @@ -60,6 +63,7 @@
import java.nio.ByteBuffer;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -263,4 +267,68 @@ public void testGetTableSchemaMap() throws IOException, WriteProcessException {
Assert.assertTrue(reader.readFileMetadata().hasTableSchemaMapCache());
}
}

@Test
public void testGetDeviceMetadataIndexNodeOffsets() throws IOException, WriteProcessException {
File file = new File(FILE_PATH);
try {
tsFile.close();
Files.deleteIfExists(file.toPath());
} catch (IOException ignored) {
}
TableSchema tableSchema =
new TableSchema(
"t1",
Arrays.asList(
new MeasurementSchema("device", TSDataType.STRING),
new MeasurementSchema("s1", TSDataType.INT32)),
Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD));
try (ITsFileWriter writer =
new TsFileWriterBuilder().tableSchema(tableSchema).file(file).build()) {
Tablet tablet =
new Tablet(
Arrays.asList("device", "s1"),
Arrays.asList(TSDataType.STRING, TSDataType.INT32),
10000);
for (int i = 0; i < 10000; i++) {
tablet.addTimestamp(i, i);
tablet.addValue("device", i, "d" + i);
tablet.addValue("s1", i, i);
}
writer.write(tablet);
}

List<IDeviceID> queriedDevices = new ArrayList<>();
for (int i = 0; i < 20000; i++) {
queriedDevices.add(new StringArrayDeviceID("t1.d" + i));
}
queriedDevices.sort(IDeviceID::compareTo);
try (TsFileSequenceReader reader = new TsFileSequenceReader(FILE_PATH)) {
long[][] offsets = reader.getDeviceMetadataIndexNodeOffsets("t1", queriedDevices, null);
Assert.assertEquals(20000, offsets.length);
for (int i = 0; i < offsets.length; i++) {
IDeviceID deviceID = queriedDevices.get(i);
int deviceNumber = Integer.parseInt(deviceID.toString().substring("t1.d".length()));
if (deviceNumber >= 10000) {
Assert.assertNull(offsets[i]);
continue;
}
MetadataIndexNode metadataIndexNode =
reader.readMetadataIndexNode(offsets[i][0], offsets[i][1], false);
List<AbstractAlignedChunkMetadata> alignedChunkMetadataList =
reader.getAlignedChunkMetadataByMetadataIndexNode(deviceID, metadataIndexNode, true);
Assert.assertEquals(1, alignedChunkMetadataList.size());

Assert.assertEquals(deviceNumber, alignedChunkMetadataList.get(0).getStartTime());
}

Assert.assertEquals(
0, reader.getDeviceMetadataIndexNodeOffsets("t1", Collections.emptyList(), null).length);
offsets =
reader.getDeviceMetadataIndexNodeOffsets(
"t1", Collections.singletonList(new StringArrayDeviceID("t1.d")), null);
Assert.assertEquals(1, offsets.length);
Assert.assertNull(offsets[0]);
}
}
}
Loading