@@ -93,7 +93,7 @@ Y_UNIT_TEST_SUITE(TestArrowBlockSplitter) {
9393 const ui64 totalSize = GetBatchDataSize (batch);
9494 constexpr ui64 numberParts = 8 ;
9595
96- TArrowBlockSplitter splitter (totalSize / numberParts, 0 , 0 );
96+ TArrowBlockSplitter splitter (totalSize / numberParts, 0 );
9797 std::vector<std::shared_ptr<arrow::RecordBatch>> splttedBatches;
9898 splitter.SplitRecordBatch (batch, 0 , splttedBatches);
9999 ValidateSplit (batch, numberParts, splttedBatches);
@@ -107,35 +107,21 @@ Y_UNIT_TEST_SUITE(TestArrowBlockSplitter) {
107107 const ui64 totalSize = GetBatchDataSize (batch) + rowOverhead * batch->num_rows ();
108108 constexpr ui64 numberParts = 8 ;
109109
110- TArrowBlockSplitter splitter (totalSize / numberParts, rowOverhead, 0 );
110+ TArrowBlockSplitter splitter (totalSize / numberParts, rowOverhead);
111111 std::vector<std::shared_ptr<arrow::RecordBatch>> splttedBatches;
112112 splitter.SplitRecordBatch (batch, 0 , splttedBatches);
113113 ValidateSplit (batch, numberParts, splttedBatches);
114114 }
115115
116- Y_UNIT_TEST (SplitByMetaSize) {
117- NConstruction::IArrayBuilder::TPtr column = std::make_shared<NConstruction::TSimpleArrayConstructor<NConstruction::TIntSeqFiller<arrow::Int64Type>>>(" field" );
118- std::shared_ptr<arrow::RecordBatch> batch = NConstruction::TRecordBatchConstructor ({ column }).BuildBatch (2048 );
119-
120- const ui64 totalSize = GetBatchDataSize (batch);
121- const ui64 batchOverhead = totalSize / 2 ;
122-
123- TArrowBlockSplitter splitter (totalSize, 0 , batchOverhead);
124- std::vector<std::shared_ptr<arrow::RecordBatch>> splttedBatches;
125- splitter.SplitRecordBatch (batch, 0 , splttedBatches);
126- ValidateSplit (batch, 2 , splttedBatches);
127- }
128-
129116 Y_UNIT_TEST (PassSmallBlock) {
130117 NConstruction::IArrayBuilder::TPtr column = std::make_shared<NConstruction::TSimpleArrayConstructor<NConstruction::TStringPoolFiller>>(
131118 " field" , NConstruction::TStringPoolFiller (8 , 512 ));
132119 std::shared_ptr<arrow::RecordBatch> batch = NConstruction::TRecordBatchConstructor ({ column }).BuildBatch (2048 );
133120
134121 constexpr ui64 rowOverhead = sizeof (ui64);
135122 const ui64 totalSize = GetBatchDataSize (batch) + rowOverhead * batch->num_rows ();
136- const ui64 batchOverhead = 1_MB;
137123
138- TArrowBlockSplitter splitter (totalSize + batchOverhead , rowOverhead, batchOverhead );
124+ TArrowBlockSplitter splitter (totalSize, rowOverhead);
139125 std::vector<std::shared_ptr<arrow::RecordBatch>> splttedBatches;
140126 splitter.SplitRecordBatch (batch, 0 , splttedBatches);
141127 ValidateSplit (batch, 1 , splttedBatches);
@@ -150,7 +136,7 @@ Y_UNIT_TEST_SUITE(TestArrowBlockSplitter) {
150136 const ui64 totalSize = GetBatchDataSize (batch);
151137
152138 constexpr ui64 rowId = 42 ;
153- TArrowBlockSplitter splitter (strSize / 2 , 0 , 0 );
139+ TArrowBlockSplitter splitter (strSize / 2 , 0 );
154140 std::vector<std::shared_ptr<arrow::RecordBatch>> splttedBatches;
155141 UNIT_ASSERT_EXCEPTION_CONTAINS (splitter.SplitRecordBatch (batch, rowId, splttedBatches), parquet::ParquetException, TStringBuilder () << " Row " << rowId + 1 << " size is " << totalSize << " , that is larger than allowed limit " << strSize / 2 );
156142 }
0 commit comments