Skip to content

Commit 6533329

Browse files
authored
[core] Support custom data file name prefix (#4041)
1 parent 02c54f5 commit 6533329

File tree

22 files changed

+239
-36
lines changed

22 files changed

+239
-36
lines changed

docs/layouts/shortcodes/generated/core_configuration.html

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@
6262
<td>MemorySize</td>
6363
<td>Memory page size for caching.</td>
6464
</tr>
65+
<tr>
66+
<td><h5>changelog-file.prefix</h5></td>
67+
<td style="word-wrap: break-word;">(none)</td>
68+
<td>String</td>
69+
<td>Specify the file name prefix of changelog files.</td>
70+
</tr>
6571
<tr>
6672
<td><h5>changelog-producer</h5></td>
6773
<td style="word-wrap: break-word;">none</td>
@@ -320,6 +326,12 @@
320326
<td>Map</td>
321327
<td>Define different file format for different level, you can add the conf like this: 'file.format.per.level' = '0:avro,3:parquet', if the file format for level is not provided, the default format which set by `file.format` will be used.</td>
322328
</tr>
329+
<tr>
330+
<td><h5>data-file.prefix</h5></td>
331+
<td style="word-wrap: break-word;">(none)</td>
332+
<td>String</td>
333+
<td>Specify the file name prefix of data files.</td>
334+
</tr>
323335
<tr>
324336
<td><h5>force-lookup</h5></td>
325337
<td style="word-wrap: break-word;">false</td>

paimon-common/src/main/java/org/apache/paimon/CoreOptions.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,18 @@ public class CoreOptions implements Serializable {
171171
.withDescription(
172172
"Default file compression zstd level. For higher compression rates, it can be configured to 9, but the read and write speed will significantly decrease.");
173173

174+
public static final ConfigOption<String> DATA_FILE_PREFIX =
175+
key("data-file.prefix")
176+
.stringType()
177+
.noDefaultValue()
178+
.withDescription("Specify the file name prefix of data files.");
179+
180+
public static final ConfigOption<String> CHANGELOG_FILE_PREFIX =
181+
key("changelog-file.prefix")
182+
.stringType()
183+
.noDefaultValue()
184+
.withDescription("Specify the file name prefix of changelog files.");
185+
174186
public static final ConfigOption<MemorySize> FILE_BLOCK_SIZE =
175187
key("file.block-size")
176188
.memoryType()
@@ -1468,6 +1480,14 @@ private static String normalizeFileFormat(String fileFormat) {
14681480
return fileFormat.toLowerCase();
14691481
}
14701482

1483+
public String dataFilePrefix() {
1484+
return options.get(DATA_FILE_PREFIX);
1485+
}
1486+
1487+
public String changelogFilePrefix() {
1488+
return options.get(CHANGELOG_FILE_PREFIX);
1489+
}
1490+
14711491
public String fieldsDefaultFunc() {
14721492
return options.get(FIELDS_DEFAULT_AGG_FUNC);
14731493
}

paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,9 @@ public FileStorePathFactory pathFactory() {
102102
options.path(),
103103
partitionType,
104104
options.partitionDefaultName(),
105-
options.fileFormat().getFormatIdentifier());
105+
options.fileFormat().getFormatIdentifier(),
106+
options.dataFilePrefix(),
107+
options.changelogFilePrefix());
106108
}
107109

108110
@Override

paimon-core/src/main/java/org/apache/paimon/KeyValueFileStore.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,9 @@ private Map<String, FileStorePathFactory> format2PathFactory() {
203203
options.path(),
204204
partitionType,
205205
options.partitionDefaultName(),
206-
format)));
206+
format,
207+
options.dataFilePrefix(),
208+
options.changelogFilePrefix())));
207209
return pathFactoryMap;
208210
}
209211

paimon-core/src/main/java/org/apache/paimon/io/DataFilePathFactory.java

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import org.apache.paimon.annotation.VisibleForTesting;
2222
import org.apache.paimon.fs.Path;
23+
import org.apache.paimon.utils.StringUtils;
2324

2425
import javax.annotation.concurrent.ThreadSafe;
2526

@@ -30,9 +31,9 @@
3031
@ThreadSafe
3132
public class DataFilePathFactory {
3233

33-
public static final String DATA_FILE_PREFIX = "data-";
34+
public static final String DEFAULT_DATA_FILE_PREFIX = "data-";
3435

35-
public static final String CHANGELOG_FILE_PREFIX = "changelog-";
36+
public static final String DEFAULT_CHANGELOG_FILE_PREFIX = "changelog-";
3637

3738
public static final String INDEX_PATH_SUFFIX = ".index";
3839

@@ -41,21 +42,35 @@ public class DataFilePathFactory {
4142

4243
private final AtomicInteger pathCount;
4344
private final String formatIdentifier;
44-
45-
public DataFilePathFactory(Path parent, String formatIdentifier) {
45+
private final String dataFilePrefix;
46+
private final String changelogFilePrefix;
47+
48+
public DataFilePathFactory(
49+
Path parent,
50+
String formatIdentifier,
51+
String dataFilePrefix,
52+
String changelogFilePrefix) {
4653
this.parent = parent;
4754
this.uuid = UUID.randomUUID().toString();
4855

4956
this.pathCount = new AtomicInteger(0);
5057
this.formatIdentifier = formatIdentifier;
58+
this.dataFilePrefix = dataFilePrefix;
59+
this.changelogFilePrefix = changelogFilePrefix;
5160
}
5261

5362
public Path newPath() {
54-
return newPath(DATA_FILE_PREFIX);
63+
if (!StringUtils.isBlank(dataFilePrefix)) {
64+
return newPath(dataFilePrefix);
65+
}
66+
return newPath(DEFAULT_DATA_FILE_PREFIX);
5567
}
5668

5769
public Path newChangelogPath() {
58-
return newPath(CHANGELOG_FILE_PREFIX);
70+
if (!StringUtils.isBlank(changelogFilePrefix)) {
71+
return newPath(changelogFilePrefix);
72+
}
73+
return newPath(DEFAULT_CHANGELOG_FILE_PREFIX);
5974
}
6075

6176
private Path newPath(String prefix) {

paimon-core/src/main/java/org/apache/paimon/operation/MergeFileSplitRead.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
import java.util.Set;
6161
import java.util.stream.Collectors;
6262

63-
import static org.apache.paimon.io.DataFilePathFactory.CHANGELOG_FILE_PREFIX;
63+
import static org.apache.paimon.io.DataFilePathFactory.DEFAULT_CHANGELOG_FILE_PREFIX;
6464
import static org.apache.paimon.predicate.PredicateBuilder.containsFields;
6565
import static org.apache.paimon.predicate.PredicateBuilder.splitAnd;
6666

@@ -314,7 +314,7 @@ public RecordReader<KeyValue> createNoMergeReader(
314314

315315
private Optional<String> changelogFile(DataFileMeta fileMeta) {
316316
for (String file : fileMeta.extraFiles()) {
317-
if (file.startsWith(CHANGELOG_FILE_PREFIX)) {
317+
if (file.startsWith(DEFAULT_CHANGELOG_FILE_PREFIX)) {
318318
return Optional.of(file);
319319
}
320320
}

paimon-core/src/main/java/org/apache/paimon/utils/FileStorePathFactory.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ public class FileStorePathFactory {
4141
private final String uuid;
4242
private final InternalRowPartitionComputer partitionComputer;
4343
private final String formatIdentifier;
44+
private final String dataFilePrefix;
45+
private final String changelogFilePrefix;
4446

4547
private final AtomicInteger manifestFileCount;
4648
private final AtomicInteger manifestListCount;
@@ -49,12 +51,19 @@ public class FileStorePathFactory {
4951
private final AtomicInteger statsFileCount;
5052

5153
public FileStorePathFactory(
52-
Path root, RowType partitionType, String defaultPartValue, String formatIdentifier) {
54+
Path root,
55+
RowType partitionType,
56+
String defaultPartValue,
57+
String formatIdentifier,
58+
String dataFilePrefix,
59+
String changelogFilePrefix) {
5360
this.root = root;
5461
this.uuid = UUID.randomUUID().toString();
5562

5663
this.partitionComputer = getPartitionComputer(partitionType, defaultPartValue);
5764
this.formatIdentifier = formatIdentifier;
65+
this.dataFilePrefix = dataFilePrefix;
66+
this.changelogFilePrefix = changelogFilePrefix;
5867

5968
this.manifestFileCount = new AtomicInteger(0);
6069
this.manifestListCount = new AtomicInteger(0);
@@ -97,7 +106,11 @@ public Path toManifestListPath(String manifestListName) {
97106
}
98107

99108
public DataFilePathFactory createDataFilePathFactory(BinaryRow partition, int bucket) {
100-
return new DataFilePathFactory(bucketPath(partition, bucket), formatIdentifier);
109+
return new DataFilePathFactory(
110+
bucketPath(partition, bucket),
111+
formatIdentifier,
112+
dataFilePrefix,
113+
changelogFilePrefix);
101114
}
102115

103116
public Path bucketPath(BinaryRow partition, int bucket) {

paimon-core/src/test/java/org/apache/paimon/append/AppendOnlyWriterTest.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,9 @@ private InternalRow row(int id, String name, String dt) {
520520
private DataFilePathFactory createPathFactory() {
521521
return new DataFilePathFactory(
522522
new Path(tempDir + "/dt=" + PART + "/bucket-0"),
523-
CoreOptions.FILE_FORMAT.defaultValue().toString());
523+
CoreOptions.FILE_FORMAT.defaultValue().toString(),
524+
CoreOptions.DATA_FILE_PREFIX.defaultValue(),
525+
CoreOptions.CHANGELOG_FILE_PREFIX.defaultValue());
524526
}
525527

526528
private AppendOnlyWriter createEmptyWriter(long targetFileSize) {

paimon-core/src/test/java/org/apache/paimon/format/FileFormatSuffixTest.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ public void testFileSuffix(@TempDir java.nio.file.Path tempDir) throws Exception
6666
assertThat(path.toString().endsWith(format)).isTrue();
6767

6868
DataFilePathFactory dataFilePathFactory =
69-
new DataFilePathFactory(new Path(tempDir + "/dt=1/bucket-1"), format);
69+
new DataFilePathFactory(
70+
new Path(tempDir + "/dt=1/bucket-1"),
71+
format,
72+
CoreOptions.DATA_FILE_PREFIX.defaultValue(),
73+
CoreOptions.CHANGELOG_FILE_PREFIX.defaultValue());
7074
FileFormat fileFormat = FileFormat.fromIdentifier(format, new Options());
7175
LinkedList<DataFileMeta> toCompact = new LinkedList<>();
7276
CoreOptions options = new CoreOptions(new HashMap<>());

paimon-core/src/test/java/org/apache/paimon/io/DataFilePathFactoryTest.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ public void testNoPartition() {
3636
DataFilePathFactory pathFactory =
3737
new DataFilePathFactory(
3838
new Path(tempDir + "/bucket-123"),
39-
CoreOptions.FILE_FORMAT.defaultValue().toString());
39+
CoreOptions.FILE_FORMAT.defaultValue().toString(),
40+
CoreOptions.DATA_FILE_PREFIX.defaultValue(),
41+
CoreOptions.CHANGELOG_FILE_PREFIX.defaultValue());
4042
String uuid = pathFactory.uuid();
4143

4244
for (int i = 0; i < 20; i++) {
@@ -60,7 +62,9 @@ public void testWithPartition() {
6062
DataFilePathFactory pathFactory =
6163
new DataFilePathFactory(
6264
new Path(tempDir + "/dt=20211224/bucket-123"),
63-
CoreOptions.FILE_FORMAT.defaultValue().toString());
65+
CoreOptions.FILE_FORMAT.defaultValue().toString(),
66+
CoreOptions.DATA_FILE_PREFIX.defaultValue(),
67+
CoreOptions.CHANGELOG_FILE_PREFIX.defaultValue());
6468
String uuid = pathFactory.uuid();
6569

6670
for (int i = 0; i < 20; i++) {

0 commit comments

Comments
 (0)