@@ -102,166 +102,6 @@ auto basicROOTTypeFromArrow(arrow::Type::type id)
102102 }
103103}
104104
105- TBranch* BranchToColumn::branch ()
106- {
107- return mBranch ;
108- }
109-
110- BranchToColumn::BranchToColumn (TBranch* branch, bool VLA, std::string name, EDataType type, int listSize, arrow::MemoryPool* pool)
111- : mBranch {branch},
112- mVLA {VLA},
113- mColumnName {std::move (name)},
114- mType {type},
115- mArrowType {arrowTypeFromROOT (type, listSize)},
116- mListSize {listSize},
117- mPool {pool}
118-
119- {
120- if (mType == EDataType::kBool_t ) {
121- if (mListSize > 1 ) {
122- auto status = arrow::MakeBuilder (mPool , mArrowType ->field (0 )->type (), &mBuilder );
123- if (!status.ok ()) {
124- throw runtime_error (" Cannot create value builder" );
125- }
126- mListBuilder = std::make_unique<arrow::FixedSizeListBuilder>(mPool , std::move (mBuilder ), mListSize );
127- mValueBuilder = static_cast <arrow::FixedSizeListBuilder*>(mListBuilder .get ())->value_builder ();
128- } else {
129- auto status = arrow::MakeBuilder (mPool , mArrowType , &mBuilder );
130- if (!status.ok ()) {
131- throw runtime_error (" Cannot create builder" );
132- }
133- mValueBuilder = mBuilder .get ();
134- }
135- }
136- }
137-
138- std::pair<std::shared_ptr<arrow::ChunkedArray>, std::shared_ptr<arrow::Field>> BranchToColumn::read (TBuffer* buffer)
139- {
140- O2_SIGNPOST_ID_FROM_POINTER (sid, tabletree_helpers, buffer);
141- auto totalEntries = mBranch ->GetEntries ();
142- arrow::Status status;
143- int readEntries = 0 ;
144- buffer->Reset ();
145- std::shared_ptr<arrow::Array> array;
146-
147- if (mType == EDataType::kBool_t ) {
148- // boolean array special case: we need to use builder to create the bitmap
149- status = mValueBuilder ->Reserve (totalEntries * mListSize );
150- if (mListSize > 1 ) {
151- status &= mListBuilder ->Reserve (totalEntries);
152- }
153- if (!status.ok ()) {
154- throw runtime_error (" Failed to reserve memory for array builder" );
155- }
156- while (readEntries < totalEntries) {
157- auto readLast = mBranch ->GetBulkRead ().GetBulkEntries (readEntries, *buffer);
158- readEntries += readLast;
159- status &= static_cast <arrow::BooleanBuilder*>(mValueBuilder )->AppendValues (reinterpret_cast <uint8_t const *>(buffer->GetCurrent ()), readLast * mListSize );
160- }
161- if (mListSize > 1 ) {
162- status &= static_cast <arrow::FixedSizeListBuilder*>(mListBuilder .get ())->AppendValues (readEntries);
163- }
164- if (!status.ok ()) {
165- throw runtime_error (" Failed to append values to array" );
166- }
167- if (mListSize > 1 ) {
168- status &= mListBuilder ->Finish (&array);
169- } else {
170- status &= mValueBuilder ->Finish (&array);
171- }
172- if (!status.ok ()) {
173- throw runtime_error (" Failed to create array" );
174- }
175- } else {
176- // other types: use serialized read to build arrays directly
177- size_t branchSize = mBranch ->GetTotBytes ();
178- auto && result = arrow::AllocateResizableBuffer (mBranch ->GetTotBytes (), mPool );
179- O2_SIGNPOST_EVENT_EMIT (tabletree_helpers, sid, " BranchToColumn" , " Allocating %ld bytes for %{public}s" , branchSize, mBranch ->GetName ());
180- if (!result.ok ()) {
181- throw runtime_error (" Cannot allocate values buffer" );
182- }
183- std::shared_ptr<arrow::Buffer> arrowValuesBuffer = std::move (result).ValueUnsafe ();
184- auto ptr = arrowValuesBuffer->mutable_data ();
185- if (ptr == nullptr ) {
186- throw runtime_error (" Invalid buffer" );
187- }
188-
189- auto typeSize = TDataType::GetDataType (mType )->Size ();
190- std::unique_ptr<TBufferFile> offsetBuffer = nullptr ;
191-
192- uint32_t offset = 0 ;
193- int count = 0 ;
194- std::shared_ptr<arrow::Buffer> arrowOffsetBuffer;
195- gsl::span<int > offsets;
196- int size = 0 ;
197- uint32_t totalSize = 0 ;
198- TBranch* mSizeBranch = nullptr ;
199- if (mVLA ) {
200- mSizeBranch = mBranch ->GetTree ()->GetBranch ((std::string{mBranch ->GetName ()} + TableTreeHelpers::sizeBranchSuffix).c_str ());
201- offsetBuffer = std::make_unique<TBufferFile>(TBuffer::EMode::kWrite , 4 * 1024 * 1024 );
202- result = arrow::AllocateResizableBuffer ((totalEntries + 1 ) * (int64_t )sizeof (int ), mPool );
203- if (!result.ok ()) {
204- throw runtime_error (" Cannot allocate offset buffer" );
205- }
206- arrowOffsetBuffer = std::move (result).ValueUnsafe ();
207- unsigned char * ptrOffset = arrowOffsetBuffer->mutable_data ();
208- auto * tPtrOffset = reinterpret_cast <int *>(ptrOffset);
209- offsets = gsl::span<int >{tPtrOffset, tPtrOffset + totalEntries + 1 };
210-
211- // read sizes first
212- while (readEntries < totalEntries) {
213- auto readLast = mSizeBranch ->GetBulkRead ().GetEntriesSerialized (readEntries, *offsetBuffer);
214- readEntries += readLast;
215- for (auto i = 0 ; i < readLast; ++i) {
216- offsets[count++] = (int )offset;
217- offset += swap32_ (reinterpret_cast <uint32_t *>(offsetBuffer->GetCurrent ())[i]);
218- }
219- }
220- offsets[count] = (int )offset;
221- totalSize = offset;
222- readEntries = 0 ;
223- }
224-
225- while (readEntries < totalEntries) {
226- auto readLast = mBranch ->GetBulkRead ().GetEntriesSerialized (readEntries, *buffer);
227- if (mVLA ) {
228- size = offsets[readEntries + readLast] - offsets[readEntries];
229- } else {
230- size = readLast * mListSize ;
231- }
232- readEntries += readLast;
233- swapCopy (ptr, buffer->GetCurrent (), size, typeSize);
234- ptr += (ptrdiff_t )(size * typeSize);
235- }
236- if (!mVLA ) {
237- totalSize = readEntries * mListSize ;
238- }
239- std::shared_ptr<arrow::PrimitiveArray> varray;
240- switch (mListSize ) {
241- case -1 :
242- varray = std::make_shared<arrow::PrimitiveArray>(mArrowType ->field (0 )->type (), totalSize, arrowValuesBuffer);
243- array = std::make_shared<arrow::ListArray>(mArrowType , readEntries, arrowOffsetBuffer, varray);
244- break ;
245- case 1 :
246- array = std::make_shared<arrow::PrimitiveArray>(mArrowType , readEntries, arrowValuesBuffer);
247- break ;
248- default :
249- varray = std::make_shared<arrow::PrimitiveArray>(mArrowType ->field (0 )->type (), totalSize, arrowValuesBuffer);
250- array = std::make_shared<arrow::FixedSizeListArray>(mArrowType , readEntries, varray);
251- }
252- }
253-
254- auto fullArray = std::make_shared<arrow::ChunkedArray>(array);
255- auto field = std::make_shared<arrow::Field>(mBranch ->GetName (), mArrowType );
256-
257- mBranch ->SetStatus (false );
258- mBranch ->DropBaskets (" all" );
259- mBranch ->Reset ();
260- mBranch ->GetTransientBuffer (0 )->Expand (0 );
261-
262- return std::make_pair (fullArray, field);
263- }
264-
265105ColumnToBranch::ColumnToBranch (TTree* tree, std::shared_ptr<arrow::ChunkedArray> const & column, std::shared_ptr<arrow::Field> const & field)
266106 : mBranchName {field->name ()},
267107 mColumn {column.get ()},
0 commit comments