Skip to content

Commit 7479a82

Browse files
feat: Optimize UnsafeRow to RowVector (facebookincubator#12841)
Summary: A follow up of facebookincubator#11936 The above PR which has been reverted introduces a core dump when the schema is `Row(MAP(INTEGER(), ROW({ARRAY(INTEGER()), INTEGER()})))` and the nested row is null, we should check the null buffer first, and then slice the ARRAY's buffer. Pull Request resolved: facebookincubator#12841 Reviewed By: pedroerp Differential Revision: D72997008 Pulled By: kKPulla fbshipit-source-id: d187393217792bd70a2970569a8075890960071a
1 parent 130a4fc commit 7479a82

File tree

8 files changed

+810
-1056
lines changed

8 files changed

+810
-1056
lines changed

velox/row/UnsafeRowDeserializers.h

Lines changed: 11 additions & 807 deletions
Large diffs are not rendered by default.

velox/row/UnsafeRowFast.cpp

Lines changed: 692 additions & 0 deletions
Large diffs are not rendered by default.

velox/row/UnsafeRowFast.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ class UnsafeRowFast {
4343
/// 'buffer' must have sufficient capacity and set to all zeros.
4444
int32_t serialize(vector_size_t index, char* buffer) const;
4545

46+
/// Deserializes multiple rows into a RowVector of specified type. The type
47+
/// must match the contents of the serialized rows.
48+
/// @param data The start memory address of each row.
49+
static RowVectorPtr deserialize(
50+
const std::vector<char*>& data,
51+
const RowTypePtr& rowType,
52+
memory::MemoryPool* pool);
53+
4654
protected:
4755
explicit UnsafeRowFast(const VectorPtr& vector);
4856

velox/row/benchmarks/DynamicRowVectorDeserializeBenchmark.cpp

Lines changed: 0 additions & 180 deletions
This file was deleted.

velox/row/benchmarks/UnsafeRowSerializeBenchmark.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "velox/common/memory/HashStringAllocator.h"
2020
#include "velox/exec/ContainerRowSerde.h"
2121
#include "velox/row/CompactRow.h"
22-
#include "velox/row/UnsafeRowDeserializers.h"
2322
#include "velox/row/UnsafeRowFast.h"
2423
#include "velox/vector/fuzzer/VectorFuzzer.h"
2524

@@ -49,7 +48,7 @@ class SerializeBenchmark {
4948
auto serialized = serialize(fast, data->size(), buffer);
5049
suspender.dismiss();
5150

52-
auto copy = UnsafeRowDeserializer::deserialize(serialized, rowType, pool());
51+
auto copy = UnsafeRowFast::deserialize(serialized, rowType, pool());
5352
VELOX_CHECK_EQ(copy->size(), data->size());
5453
}
5554

@@ -145,17 +144,17 @@ class SerializeBenchmark {
145144
return totalSize;
146145
}
147146

148-
std::vector<std::optional<std::string_view>> serialize(
147+
std::vector<char*> serialize(
149148
UnsafeRowFast& unsafeRow,
150149
vector_size_t numRows,
151150
BufferPtr& buffer) {
152-
std::vector<std::optional<std::string_view>> serialized;
151+
std::vector<char*> serialized;
153152
auto rawBuffer = buffer->asMutable<char>();
154153

155154
size_t offset = 0;
156155
for (auto i = 0; i < numRows; ++i) {
157156
auto rowSize = unsafeRow.serialize(i, rawBuffer + offset);
158-
serialized.push_back(std::string_view(rawBuffer + offset, rowSize));
157+
serialized.push_back(rawBuffer + offset);
159158
offset += rowSize;
160159
}
161160

velox/row/tests/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
add_executable(velox_row_test CompactRowTest.cpp UnsafeRowFuzzTest.cpp)
15+
add_executable(velox_row_test CompactRowTest.cpp UnsafeRowTest.cpp)
1616

1717
add_test(velox_row_test velox_row_test)
1818

0 commit comments

Comments
 (0)