Skip to content

Commit 1b8fe5d

Browse files
committed
[VL] Adding configurations on max write file size
Signed-off-by: Yuan <yuanzhou@apache.org>
1 parent ae2fc5b commit 1b8fe5d

File tree

4 files changed

+14
-0
lines changed

4 files changed

+14
-0
lines changed

backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,16 @@ object VeloxConfig extends ConfigRegistry {
274274
.checkValue(_ > 0, "must be a positive number")
275275
.createWithDefault(10000)
276276

277+
val MAX_TARGET_FILE_SIZE_SESSION =
278+
buildConf("spark.gluten.sql.columnar.backend.velox.maxTargetFileSizeSession")
279+
.doc(
280+
"The target file size for each output file when writing data. " +
281+
"0 means no limit on target file size, and the actual file size will be determined by " +
282+
"other factors such as max partition number and shuffle batch size.")
283+
.bytesConf(ByteUnit.BYTE)
284+
.checkValue(_ >= 0, "must be a non-negative number")
285+
.createWithDefault(0)
286+
277287
val COLUMNAR_VELOX_RESIZE_BATCHES_SHUFFLE_INPUT =
278288
buildConf("spark.gluten.sql.columnar.backend.velox.resizeBatches.shuffleInput")
279289
.doc(

cpp/velox/config/VeloxConfig.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ const std::string kParquetUseColumnNames = "spark.gluten.sql.columnar.backend.ve
160160

161161
// write fies
162162
const std::string kMaxPartitions = "spark.gluten.sql.columnar.backend.velox.maxPartitionsPerWritersSession";
163+
const std::string KMaxTargetFileSize = "spark.gluten.sql.columnar.backend.velox.maxTargetFileSizeSession";
163164

164165
const std::string kGlogVerboseLevel = "spark.gluten.sql.columnar.backend.velox.glogVerboseLevel";
165166
const uint32_t kGlogVerboseLevelDefault = 0;

cpp/velox/utils/ConfigExtractor.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,8 @@ std::shared_ptr<facebook::velox::config::ConfigBase> createHiveConnectorSessionC
231231
configs[facebook::velox::connector::hive::HiveConfig::kReadTimestampUnitSession] = std::string("6");
232232
configs[facebook::velox::connector::hive::HiveConfig::kMaxPartitionsPerWritersSession] =
233233
conf->get<std::string>(kMaxPartitions, "10000");
234+
configs[facebook::velox::connector::hive::HiveConfig::kMaxTargetFileSizeSession] =
235+
conf->get<std::string>(KMaxTargetFileSize, "0B"); // 0 means no limit on target file size
234236
configs[facebook::velox::connector::hive::HiveConfig::kIgnoreMissingFilesSession] =
235237
conf->get<bool>(kIgnoreMissingFiles, false) ? "true" : "false";
236238
configs[facebook::velox::connector::hive::HiveConfig::kParquetUseColumnNamesSession] =

docs/velox-configuration.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ nav_order: 16
4848
| spark.gluten.sql.columnar.backend.velox.maxSpillFileSize | 1GB | The maximum size of a single spill file created |
4949
| spark.gluten.sql.columnar.backend.velox.maxSpillLevel | 4 | The max allowed spilling level with zero being the initial spilling level |
5050
| spark.gluten.sql.columnar.backend.velox.maxSpillRunRows | 3M | The maximum row size of a single spill run |
51+
| spark.gluten.sql.columnar.backend.velox.maxTargetFileSizeSession | 0b | The target file size for each output file when writing data. 0 means no limit on target file size, and the actual file size will be determined by other factors such as max partition number and shuffle batch size. |
5152
| spark.gluten.sql.columnar.backend.velox.memCacheSize | 1GB | The memory cache size |
5253
| spark.gluten.sql.columnar.backend.velox.memInitCapacity | 8MB | The initial memory capacity to reserve for a newly created Velox query memory pool. |
5354
| spark.gluten.sql.columnar.backend.velox.memoryPoolCapacityTransferAcrossTasks | true | Whether to allow memory capacity transfer between memory pools from different tasks. |

0 commit comments

Comments
 (0)