Skip to content

Commit a9f3fb5

Browse files
committed
Don't merge statistics when two types are not compatible;
Fix regenerate statistics logics.
1 parent 524cc4c commit a9f3fb5

File tree

3 files changed

+212
-24
lines changed

3 files changed

+212
-24
lines changed

iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/utils/ResourceByPathUtils.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,8 @@ public AbstractAlignedTimeSeriesMetadata generateTimeSeriesMetadata(
280280
if (!useFakeStatistics) {
281281
timeStatistics.mergeStatistics(alignedChunkMetadata.getTimeChunkMetadata().getStatistics());
282282
for (int i = 0; i < valueTimeSeriesMetadataList.size(); i++) {
283-
if (alignedChunkMetadata.getValueChunkMetadataList().get(i) != null) {
283+
if (!alignedChunkMetadata.getValueChunkMetadataList().isEmpty()
284+
&& alignedChunkMetadata.getValueChunkMetadataList().get(i) != null) {
284285
exist[i] = true;
285286
valueTimeSeriesMetadataList
286287
.get(i)
@@ -551,7 +552,9 @@ public ITimeSeriesMetadata generateTimeSeriesMetadata(
551552
chunkMetadata.setModified(true);
552553
}
553554
if (!useFakeStatistics) {
554-
seriesStatistics.mergeStatistics(chunkMetadata.getStatistics());
555+
if (targetDataType.isCompatible(chunkMetadata.getDataType())) {
556+
seriesStatistics.mergeStatistics(chunkMetadata.getStatistics());
557+
}
555558
continue;
556559
}
557560
startTime = Math.min(startTime, chunkMetadata.getStartTime());

iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java

Lines changed: 44 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -403,25 +403,29 @@ public static AbstractAlignedChunkMetadata rewriteAlignedChunkMetadataStatistics
403403
AbstractAlignedChunkMetadata alignedChunkMetadata, TSDataType targetDataType) {
404404
List<IChunkMetadata> newValueChunkMetadataList = new ArrayList<>();
405405
for (IChunkMetadata valueChunkMetadata : alignedChunkMetadata.getValueChunkMetadataList()) {
406-
Statistics<?> statistics = Statistics.getStatsByType(targetDataType);
407-
statistics = getNewStatistics(valueChunkMetadata, targetDataType, statistics);
408-
409-
ChunkMetadata newChunkMetadata = (ChunkMetadata) valueChunkMetadata;
410-
newChunkMetadata.setTsDataType(targetDataType);
411-
newChunkMetadata.setStatistics(statistics);
412-
newValueChunkMetadataList.add(newChunkMetadata);
406+
if (targetDataType.isCompatible(valueChunkMetadata.getDataType())) {
407+
Statistics<?> statistics = Statistics.getStatsByType(targetDataType);
408+
statistics = getNewStatistics(valueChunkMetadata, targetDataType, statistics);
409+
410+
ChunkMetadata newChunkMetadata = (ChunkMetadata) valueChunkMetadata;
411+
newChunkMetadata.setTsDataType(targetDataType);
412+
newChunkMetadata.setStatistics(statistics);
413+
newValueChunkMetadataList.add(newChunkMetadata);
414+
}
413415
}
414416
return new AlignedChunkMetadata(
415417
alignedChunkMetadata.getTimeChunkMetadata(), newValueChunkMetadataList);
416418
}
417419

418420
public static void rewriteNonAlignedChunkMetadataStatistics(
419421
ChunkMetadata chunkMetadata, TSDataType targetDataType) {
420-
Statistics<?> statistics = Statistics.getStatsByType(targetDataType);
421-
statistics = getNewStatistics(chunkMetadata, targetDataType, statistics);
422+
if (targetDataType.isCompatible(chunkMetadata.getDataType())) {
423+
Statistics<?> statistics = Statistics.getStatsByType(targetDataType);
424+
statistics = getNewStatistics(chunkMetadata, targetDataType, statistics);
422425

423-
chunkMetadata.setTsDataType(targetDataType);
424-
chunkMetadata.setStatistics(statistics);
426+
chunkMetadata.setTsDataType(targetDataType);
427+
chunkMetadata.setStatistics(statistics);
428+
}
425429
}
426430

427431
public static TSEncoding getDataTypeCompatibleEncoding(TSDataType dataType, TSEncoding encoding) {
@@ -492,28 +496,28 @@ public static Statistics<?> getNewStatistics(
492496
Binary[] binaryValues = new Binary[2];
493497
binaryValues[0] =
494498
new Binary(
495-
Arrays.asList(TSDataType.TEXT, TSDataType.BLOB)
496-
.contains(chunkMetadata.getDataType())
497-
? ""
498-
: chunkMetadata.getStatistics().getMinValue().toString(),
499-
StandardCharsets.UTF_8);
499+
chunkMetadata.getStatistics().getMinValue().toString(), StandardCharsets.UTF_8);
500500
binaryValues[1] =
501501
new Binary(
502-
Arrays.asList(TSDataType.TEXT, TSDataType.BLOB)
503-
.contains(chunkMetadata.getDataType())
504-
? ""
505-
: chunkMetadata.getStatistics().getMaxValue().toString(),
506-
StandardCharsets.UTF_8);
502+
chunkMetadata.getStatistics().getMaxValue().toString(), StandardCharsets.UTF_8);
507503
long[] longValues = new long[2];
508504
longValues[0] = chunkMetadata.getStatistics().getStartTime();
509505
longValues[1] = chunkMetadata.getStatistics().getEndTime();
510506
statistics.update(longValues, binaryValues, binaryValues.length);
507+
} else if (targetDataType == TSDataType.BLOB) {
508+
statistics.update(
509+
chunkMetadata.getStatistics().getStartTime(),
510+
new Binary(
511+
chunkMetadata.getStatistics().getMinValue().toString(), StandardCharsets.UTF_8));
512+
statistics.update(
513+
chunkMetadata.getStatistics().getEndTime(),
514+
new Binary(
515+
chunkMetadata.getStatistics().getMaxValue().toString(), StandardCharsets.UTF_8));
511516
} else {
512517
statistics = chunkMetadata.getStatistics();
513518
}
514519
break;
515520
case TEXT:
516-
case BLOB:
517521
if (targetDataType == TSDataType.STRING) {
518522
Binary[] binaryValues = new Binary[2];
519523
binaryValues[0] = new Binary("", StandardCharsets.UTF_8);
@@ -522,6 +526,24 @@ public static Statistics<?> getNewStatistics(
522526
longValues[0] = chunkMetadata.getStatistics().getStartTime();
523527
longValues[1] = chunkMetadata.getStatistics().getEndTime();
524528
statistics.update(longValues, binaryValues, binaryValues.length);
529+
} else if (targetDataType == TSDataType.BLOB) {
530+
statistics.update(
531+
chunkMetadata.getStatistics().getStartTime(), new Binary("", StandardCharsets.UTF_8));
532+
statistics.update(
533+
chunkMetadata.getStatistics().getEndTime(), new Binary("", StandardCharsets.UTF_8));
534+
} else {
535+
statistics = chunkMetadata.getStatistics();
536+
}
537+
break;
538+
case BLOB:
539+
if (targetDataType == TSDataType.STRING || targetDataType == TSDataType.TEXT) {
540+
Binary[] binaryValues = new Binary[2];
541+
binaryValues[0] = new Binary("", StandardCharsets.UTF_8);
542+
binaryValues[1] = new Binary("", StandardCharsets.UTF_8);
543+
long[] longValues = new long[2];
544+
longValues[0] = chunkMetadata.getStatistics().getStartTime();
545+
longValues[1] = chunkMetadata.getStatistics().getEndTime();
546+
statistics.update(longValues, binaryValues, binaryValues.length);
525547
} else {
526548
statistics = chunkMetadata.getStatistics();
527549
}

iotdb-core/datanode/src/test/java/org/apache/iotdb/db/utils/SchemaUtilsTest.java

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,20 @@
2929
import org.apache.tsfile.file.metadata.enums.CompressionType;
3030
import org.apache.tsfile.file.metadata.enums.TSEncoding;
3131
import org.apache.tsfile.file.metadata.statistics.Statistics;
32+
import org.apache.tsfile.utils.Binary;
3233
import org.junit.Assert;
3334
import org.junit.Test;
3435

36+
import java.nio.charset.StandardCharsets;
37+
import java.time.LocalDate;
3538
import java.util.ArrayList;
3639
import java.util.Arrays;
3740
import java.util.Collections;
41+
import java.util.HashSet;
3842
import java.util.List;
43+
import java.util.Set;
44+
45+
import static org.apache.tsfile.file.metadata.statistics.Statistics.canMerge;
3946

4047
public class SchemaUtilsTest {
4148
@Test
@@ -112,4 +119,160 @@ public void rewriteAlignedChunkMetadataStatistics() {
112119
}
113120
}
114121
}
122+
123+
public static Object genValue(TSDataType dataType, int i) {
124+
switch (dataType) {
125+
case INT32:
126+
return i;
127+
case DATE:
128+
return LocalDate.ofEpochDay(i);
129+
case TIMESTAMP:
130+
case INT64:
131+
return (long) i;
132+
case BOOLEAN:
133+
return i % 2 == 0;
134+
case FLOAT:
135+
return i * 1.0f;
136+
case DOUBLE:
137+
return i * 1.0;
138+
case STRING:
139+
case TEXT:
140+
case BLOB:
141+
return new Binary(Integer.toString(i), StandardCharsets.UTF_8);
142+
case UNKNOWN:
143+
case VECTOR:
144+
default:
145+
throw new IllegalArgumentException("Unsupported data type: " + dataType);
146+
}
147+
}
148+
149+
@Test
150+
public void mergeMetadataStatistics() throws Exception {
151+
Set<TSDataType> unsupportTsDataType = new HashSet<>();
152+
unsupportTsDataType.add(TSDataType.UNKNOWN);
153+
unsupportTsDataType.add(TSDataType.VECTOR);
154+
for (TSDataType sourceDataType : Arrays.asList(TSDataType.DOUBLE)) {
155+
for (TSDataType targetDataType : Arrays.asList(TSDataType.TEXT, TSDataType.BLOB)) {
156+
157+
if (sourceDataType.equals(targetDataType)) {
158+
continue;
159+
}
160+
if (unsupportTsDataType.contains(sourceDataType)
161+
|| unsupportTsDataType.contains(targetDataType)) {
162+
continue;
163+
}
164+
165+
System.out.println("from " + sourceDataType + " to " + targetDataType);
166+
167+
// Aligned series
168+
Statistics<?> s1 = Statistics.getStatsByType(sourceDataType);
169+
s1.update(new long[] {1, 2}, new double[] {1.0, 2.0}, 2);
170+
Statistics<?> s2 = Statistics.getStatsByType(TSDataType.DOUBLE);
171+
s2.update(new long[] {1, 2}, new double[] {1.0, 2.0}, 2);
172+
List<IChunkMetadata> valueChunkMetadatas =
173+
Arrays.asList(
174+
new ChunkMetadata(
175+
"s0",
176+
sourceDataType,
177+
SchemaUtils.getDataTypeCompatibleEncoding(sourceDataType, TSEncoding.RLE),
178+
CompressionType.LZ4,
179+
0,
180+
s1),
181+
new ChunkMetadata(
182+
"s1",
183+
TSDataType.DOUBLE,
184+
SchemaUtils.getDataTypeCompatibleEncoding(TSDataType.DOUBLE, TSEncoding.RLE),
185+
CompressionType.LZ4,
186+
0,
187+
s2));
188+
IChunkMetadata alignedChunkMetadata =
189+
new AlignedChunkMetadata(new ChunkMetadata(), valueChunkMetadatas);
190+
191+
Statistics<?> s3 = Statistics.getStatsByType(targetDataType);
192+
if (targetDataType == TSDataType.BLOB) {
193+
s3.update(3, new Binary("3", StandardCharsets.UTF_8));
194+
s3.update(4, new Binary("4", StandardCharsets.UTF_8));
195+
} else {
196+
s3.update(
197+
new long[] {1, 2},
198+
new Binary[] {
199+
new Binary("3", StandardCharsets.UTF_8),
200+
new Binary("4", StandardCharsets.UTF_8),
201+
new Binary("3", StandardCharsets.UTF_8),
202+
new Binary("4", StandardCharsets.UTF_8)
203+
},
204+
2);
205+
}
206+
Statistics<?> s4 = Statistics.getStatsByType(targetDataType);
207+
if (targetDataType == TSDataType.BLOB) {
208+
s3.update(4, new Binary("4", StandardCharsets.UTF_8));
209+
} else {
210+
s4.update(
211+
new long[] {1, 2},
212+
new Binary[] {
213+
new Binary("5", StandardCharsets.UTF_8),
214+
new Binary("6", StandardCharsets.UTF_8),
215+
new Binary("5", StandardCharsets.UTF_8),
216+
new Binary("6", StandardCharsets.UTF_8)
217+
},
218+
2);
219+
}
220+
List<IChunkMetadata> targetChunkMetadatas =
221+
Arrays.asList(
222+
new ChunkMetadata(
223+
"s0",
224+
targetDataType,
225+
SchemaUtils.getDataTypeCompatibleEncoding(targetDataType, TSEncoding.RLE),
226+
CompressionType.LZ4,
227+
0,
228+
s3),
229+
new ChunkMetadata(
230+
"s1",
231+
targetDataType,
232+
SchemaUtils.getDataTypeCompatibleEncoding(targetDataType, TSEncoding.RLE),
233+
CompressionType.LZ4,
234+
0,
235+
s4));
236+
AbstractAlignedChunkMetadata abstractAlignedChunkMetadata =
237+
(AbstractAlignedChunkMetadata) alignedChunkMetadata;
238+
try {
239+
abstractAlignedChunkMetadata =
240+
SchemaUtils.rewriteAlignedChunkMetadataStatistics(
241+
abstractAlignedChunkMetadata, targetDataType);
242+
} catch (ClassCastException e) {
243+
Assert.fail(e.getMessage());
244+
}
245+
246+
for (int i = 0; i < targetChunkMetadatas.size(); i++) {
247+
if (!abstractAlignedChunkMetadata.getValueChunkMetadataList().isEmpty()
248+
&& abstractAlignedChunkMetadata.getValueChunkMetadataList().get(i) != null) {
249+
if (targetChunkMetadatas.get(i).getStatistics().getClass()
250+
== abstractAlignedChunkMetadata
251+
.getValueChunkMetadataList()
252+
.get(i)
253+
.getStatistics()
254+
.getClass()
255+
|| canMerge(
256+
abstractAlignedChunkMetadata
257+
.getValueChunkMetadataList()
258+
.get(i)
259+
.getStatistics()
260+
.getType(),
261+
targetChunkMetadatas.get(i).getStatistics().getType())) {
262+
targetChunkMetadatas
263+
.get(i)
264+
.getStatistics()
265+
.mergeStatistics(
266+
abstractAlignedChunkMetadata
267+
.getValueChunkMetadataList()
268+
.get(i)
269+
.getStatistics());
270+
} else {
271+
throw new Exception("unsupported");
272+
}
273+
}
274+
}
275+
}
276+
}
277+
}
115278
}

0 commit comments

Comments
 (0)