Skip to content

Commit 5c598d1

Browse files
committed
impl ContentFileEstimater
1 parent d5e5551 commit 5c598d1

File tree

2 files changed

+195
-15
lines changed

2 files changed

+195
-15
lines changed
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.datasource.iceberg.cache;
19+
20+
import org.apache.iceberg.ContentFile;
21+
import org.apache.iceberg.DeleteFile;
22+
import org.apache.iceberg.StructLike;
23+
24+
import java.nio.ByteBuffer;
25+
import java.util.List;
26+
import java.util.Map;
27+
28+
/**
29+
* Utility to estimate the JVM weight of Iceberg {@link ContentFile} objects.
30+
*/
31+
public final class ContentFileEstimater {
32+
private static final long LIST_BASE_WEIGHT = 48L;
33+
private static final long OBJECT_REFERENCE_WEIGHT = 8L;
34+
private static final long CONTENT_FILE_BASE_WEIGHT = 256L;
35+
private static final long STRING_BASE_WEIGHT = 40L;
36+
private static final long CHAR_BYTES = 2L;
37+
private static final long BYTE_BUFFER_BASE_WEIGHT = 16L;
38+
private static final long MAP_BASE_WEIGHT = 48L;
39+
private static final long MAP_ENTRY_OVERHEAD = 24L;
40+
private static final long LONG_OBJECT_WEIGHT = 24L;
41+
private static final long INT_OBJECT_WEIGHT = 16L;
42+
private static final long PARTITION_BASE_WEIGHT = 48L;
43+
private static final long PARTITION_VALUE_BASE_WEIGHT = 8L;
44+
45+
private ContentFileEstimater() {
46+
}
47+
48+
public static long estimate(List<? extends ContentFile<?>> files) {
49+
return listReferenceWeight(files) + estimateContentFilesWeight(files);
50+
}
51+
52+
private static long listReferenceWeight(List<?> files) {
53+
if (files == null || files.isEmpty()) {
54+
return 0L;
55+
}
56+
return LIST_BASE_WEIGHT + (long) files.size() * OBJECT_REFERENCE_WEIGHT;
57+
}
58+
59+
private static long estimateContentFilesWeight(List<? extends ContentFile<?>> files) {
60+
long total = 0L;
61+
if (files == null) {
62+
return 0L;
63+
}
64+
for (ContentFile<?> file : files) {
65+
total += estimateContentFileWeight(file);
66+
}
67+
return total;
68+
}
69+
70+
private static long estimateContentFileWeight(ContentFile<?> file) {
71+
if (file == null) {
72+
return 0L;
73+
}
74+
75+
long weight = CONTENT_FILE_BASE_WEIGHT;
76+
weight += charSequenceWeight(file.path());
77+
weight += stringWeight(file.manifestLocation());
78+
weight += byteBufferWeight(file.keyMetadata());
79+
weight += partitionWeight(file.partition());
80+
81+
weight += numericMapWeight(file.columnSizes());
82+
weight += numericMapWeight(file.valueCounts());
83+
weight += numericMapWeight(file.nullValueCounts());
84+
weight += numericMapWeight(file.nanValueCounts());
85+
weight += byteBufferMapWeight(file.lowerBounds());
86+
weight += byteBufferMapWeight(file.upperBounds());
87+
88+
weight += listWeight(file.splitOffsets(), LONG_OBJECT_WEIGHT);
89+
weight += listWeight(file.equalityFieldIds(), INT_OBJECT_WEIGHT);
90+
91+
weight += optionalLongWeight(file.pos());
92+
weight += optionalLongWeight(file.dataSequenceNumber());
93+
weight += optionalLongWeight(file.fileSequenceNumber());
94+
weight += optionalLongWeight(file.firstRowId());
95+
weight += optionalIntWeight(file.sortOrderId());
96+
97+
if (file instanceof DeleteFile) {
98+
DeleteFile deleteFile = (DeleteFile) file;
99+
weight += stringWeight(deleteFile.referencedDataFile());
100+
weight += optionalLongWeight(deleteFile.contentOffset());
101+
weight += optionalLongWeight(deleteFile.contentSizeInBytes());
102+
}
103+
104+
return weight;
105+
}
106+
107+
private static long listWeight(List<? extends Number> list, long elementWeight) {
108+
if (list == null || list.isEmpty()) {
109+
return 0L;
110+
}
111+
return LIST_BASE_WEIGHT + (long) list.size() * (OBJECT_REFERENCE_WEIGHT + elementWeight);
112+
}
113+
114+
private static long numericMapWeight(Map<Integer, Long> map) {
115+
if (map == null || map.isEmpty()) {
116+
return 0L;
117+
}
118+
return MAP_BASE_WEIGHT + (long) map.size() * (MAP_ENTRY_OVERHEAD + LONG_OBJECT_WEIGHT);
119+
}
120+
121+
private static long byteBufferMapWeight(Map<Integer, ByteBuffer> map) {
122+
if (map == null || map.isEmpty()) {
123+
return 0L;
124+
}
125+
long weight = MAP_BASE_WEIGHT + (long) map.size() * MAP_ENTRY_OVERHEAD;
126+
for (ByteBuffer buffer : map.values()) {
127+
weight += byteBufferWeight(buffer);
128+
}
129+
return weight;
130+
}
131+
132+
private static long partitionWeight(StructLike partition) {
133+
if (partition == null) {
134+
return 0L;
135+
}
136+
long weight = PARTITION_BASE_WEIGHT + (long) partition.size() * PARTITION_VALUE_BASE_WEIGHT;
137+
for (int i = 0; i < partition.size(); i++) {
138+
Object value = partition.get(i, Object.class);
139+
weight += estimateValueWeight(value);
140+
}
141+
return weight;
142+
}
143+
144+
private static long estimateValueWeight(Object value) {
145+
if (value == null) {
146+
return 0L;
147+
}
148+
if (value instanceof CharSequence) {
149+
return charSequenceWeight((CharSequence) value);
150+
} else if (value instanceof byte[]) {
151+
return BYTE_BUFFER_BASE_WEIGHT + ((byte[]) value).length;
152+
} else if (value instanceof ByteBuffer) {
153+
return byteBufferWeight((ByteBuffer) value);
154+
} else if (value instanceof Long || value instanceof Double) {
155+
return LONG_OBJECT_WEIGHT;
156+
} else if (value instanceof Integer || value instanceof Float) {
157+
return INT_OBJECT_WEIGHT;
158+
} else if (value instanceof Short || value instanceof Character) {
159+
return 4L;
160+
} else if (value instanceof Boolean) {
161+
return 1L;
162+
}
163+
return OBJECT_REFERENCE_WEIGHT;
164+
}
165+
166+
private static long charSequenceWeight(CharSequence value) {
167+
if (value == null) {
168+
return 0L;
169+
}
170+
return STRING_BASE_WEIGHT + (long) value.length() * CHAR_BYTES;
171+
}
172+
173+
private static long stringWeight(String value) {
174+
if (value == null) {
175+
return 0L;
176+
}
177+
return STRING_BASE_WEIGHT + (long) value.length() * CHAR_BYTES;
178+
}
179+
180+
private static long byteBufferWeight(ByteBuffer buffer) {
181+
if (buffer == null) {
182+
return 0L;
183+
}
184+
return BYTE_BUFFER_BASE_WEIGHT + buffer.remaining();
185+
}
186+
187+
private static long optionalLongWeight(Long value) {
188+
return value == null ? 0L : LONG_OBJECT_WEIGHT;
189+
}
190+
191+
private static long optionalIntWeight(Integer value) {
192+
return value == null ? 0L : INT_OBJECT_WEIGHT;
193+
}
194+
}

fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/cache/ManifestCacheValue.java

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -60,20 +60,6 @@ public long getWeightBytes() {
6060
}
6161

6262
private static long estimateWeight(List<DataFile> dataFiles, List<DeleteFile> deleteFiles) {
63-
// A coarse weight estimation based on path lengths and fixed object overhead.
64-
long total = 0;
65-
for (DataFile file : dataFiles) {
66-
total += 128L; // base object overhead
67-
if (file != null && file.path() != null) {
68-
total += file.path().toString().length();
69-
}
70-
}
71-
for (DeleteFile file : deleteFiles) {
72-
total += 128L;
73-
if (file != null && file.path() != null) {
74-
total += file.path().toString().length();
75-
}
76-
}
77-
return total;
63+
return ContentFileEstimater.estimate(dataFiles) + ContentFileEstimater.estimate(deleteFiles);
7864
}
7965
}

0 commit comments

Comments
 (0)