Skip to content

Commit 5c27b6f

Browse files
committed
shuffle
1 parent f72ef75 commit 5c27b6f

File tree

4 files changed

+237
-181
lines changed

4 files changed

+237
-181
lines changed

r/src/vctr_builder.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@
2929

3030
#include "vctr_builder.h"
3131
#include "vctr_builder_base.h"
32+
#include "vctr_builder_int.h"
3233
#include "vctr_builder_list_of.h"
3334
#include "vctr_builder_primitive.h"
3435
#include "vctr_builder_rcrd.h"
36+
#include "vctr_builder_unspecified.h"
3537

3638
// These conversions are the default R-native type guesses for
3739
// an array that don't require extra information from the ptype (e.g.,

r/src/vctr_builder_int.h

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#ifndef R_NANOARROW_VCTR_BUILDER_INT_H_INCLUDED
19+
#define R_NANOARROW_VCTR_BUILDER_INT_H_INCLUDED
20+
21+
#define R_NO_REMAP
22+
#include <R.h>
23+
#include <Rinternals.h>
24+
25+
#include "vctr_builder_base.h"
26+
27+
class IntBuilder : public VctrBuilder {
28+
public:
29+
explicit IntBuilder(SEXP ptype_sexp) : VctrBuilder(VECTOR_TYPE_INT, ptype_sexp) {}
30+
31+
ArrowErrorCode Init(const ArrowSchema* schema, VctrBuilderOptions options,
32+
ArrowError* error) override {
33+
NANOARROW_RETURN_NOT_OK(VctrBuilder::Init(schema, options, error));
34+
return NANOARROW_OK;
35+
}
36+
37+
SEXP GetPtype() override { return Rf_allocVector(INTSXP, 0); }
38+
39+
ArrowErrorCode Reserve(R_xlen_t n, ArrowError* error) override {
40+
NANOARROW_RETURN_NOT_OK(VctrBuilder::Reserve(n, error));
41+
SEXP value = PROTECT(Rf_allocVector(INTSXP, n));
42+
SetValue(value);
43+
UNPROTECT(1);
44+
return NANOARROW_OK;
45+
}
46+
47+
ArrowErrorCode PushNext(const ArrowArray* array, ArrowError* error) override {
48+
NANOARROW_RETURN_NOT_OK(VctrBuilder::PushNext(array, error));
49+
50+
int* result = INTEGER(value_);
51+
int64_t n_bad_values = 0;
52+
53+
// True for all the types supported here
54+
const uint8_t* is_valid = array_view_.buffer_views[0].data.as_uint8;
55+
int64_t raw_src_offset = array_view_.offset;
56+
R_xlen_t length = array->length;
57+
58+
// Fill the buffer
59+
switch (array_view_.storage_type) {
60+
case NANOARROW_TYPE_NA:
61+
for (R_xlen_t i = 0; i < length; i++) {
62+
result[value_size_ + i] = NA_INTEGER;
63+
}
64+
break;
65+
case NANOARROW_TYPE_INT32:
66+
memcpy(result + value_size_,
67+
array_view_.buffer_views[1].data.as_int32 + raw_src_offset,
68+
length * sizeof(int32_t));
69+
70+
// Set any nulls to NA_INTEGER
71+
if (is_valid != NULL && array_view_.null_count != 0) {
72+
for (R_xlen_t i = 0; i < length; i++) {
73+
if (!ArrowBitGet(is_valid, raw_src_offset + i)) {
74+
result[value_size_ + i] = NA_INTEGER;
75+
}
76+
}
77+
}
78+
break;
79+
case NANOARROW_TYPE_BOOL:
80+
ArrowBitsUnpackInt32(array_view_.buffer_views[1].data.as_uint8 + raw_src_offset,
81+
raw_src_offset, length, result + value_size_);
82+
83+
// Set any nulls to NA_LOGICAL
84+
if (is_valid != NULL && array_view_.null_count != 0) {
85+
for (R_xlen_t i = 0; i < length; i++) {
86+
if (!ArrowBitGet(is_valid, raw_src_offset + i)) {
87+
result[value_size_ + i] = NA_LOGICAL;
88+
}
89+
}
90+
}
91+
break;
92+
case NANOARROW_TYPE_INT8:
93+
case NANOARROW_TYPE_UINT8:
94+
case NANOARROW_TYPE_INT16:
95+
case NANOARROW_TYPE_UINT16:
96+
// No need to bounds check for these types
97+
for (R_xlen_t i = 0; i < length; i++) {
98+
result[value_size_ + i] = (int32_t)ArrowArrayViewGetIntUnsafe(&array_view_, i);
99+
}
100+
101+
// Set any nulls to NA_INTEGER
102+
if (is_valid != NULL && array_view_.null_count != 0) {
103+
for (R_xlen_t i = 0; i < length; i++) {
104+
if (!ArrowBitGet(is_valid, raw_src_offset + i)) {
105+
result[value_size_ + i] = NA_INTEGER;
106+
}
107+
}
108+
}
109+
break;
110+
case NANOARROW_TYPE_UINT32:
111+
case NANOARROW_TYPE_INT64:
112+
case NANOARROW_TYPE_UINT64:
113+
case NANOARROW_TYPE_FLOAT:
114+
case NANOARROW_TYPE_DOUBLE:
115+
// Loop + bounds check. Because we don't know what memory might be
116+
// in a null slot, we have to check nulls if there are any.
117+
if (is_valid != NULL && array_view_.null_count != 0) {
118+
for (R_xlen_t i = 0; i < length; i++) {
119+
if (ArrowBitGet(is_valid, raw_src_offset + i)) {
120+
int64_t value = ArrowArrayViewGetIntUnsafe(&array_view_, i);
121+
if (value > INT_MAX || value <= NA_INTEGER) {
122+
result[value_size_ + i] = NA_INTEGER;
123+
n_bad_values++;
124+
} else {
125+
result[value_size_ + i] = (int32_t)value;
126+
}
127+
} else {
128+
result[value_size_ + i] = NA_INTEGER;
129+
}
130+
}
131+
} else {
132+
for (R_xlen_t i = 0; i < length; i++) {
133+
int64_t value = ArrowArrayViewGetIntUnsafe(&array_view_, i);
134+
if (value > INT_MAX || value <= NA_INTEGER) {
135+
result[value_size_ + i] = NA_INTEGER;
136+
n_bad_values++;
137+
} else {
138+
result[value_size_ + i] = (int32_t)value;
139+
}
140+
}
141+
}
142+
break;
143+
144+
default:
145+
return EINVAL;
146+
}
147+
148+
if (n_bad_values > 0) {
149+
WarnLossyConvert("outside integer range set to NA", n_bad_values);
150+
}
151+
152+
return NANOARROW_OK;
153+
}
154+
};
155+
156+
#endif

r/src/vctr_builder_primitive.h

Lines changed: 0 additions & 181 deletions
Original file line numberDiff line numberDiff line change
@@ -24,187 +24,6 @@
2424

2525
#include "vctr_builder_base.h"
2626

27-
class UnspecifiedBuilder : public VctrBuilder {
28-
public:
29-
explicit UnspecifiedBuilder(SEXP ptype_sexp)
30-
: VctrBuilder(VECTOR_TYPE_UNSPECIFIED, ptype_sexp) {}
31-
32-
ArrowErrorCode Init(const ArrowSchema* schema, VctrBuilderOptions options,
33-
ArrowError* error) override {
34-
NANOARROW_RETURN_NOT_OK(VctrBuilder::Init(schema, options, error));
35-
switch (schema_view_.type) {
36-
case NANOARROW_TYPE_DICTIONARY:
37-
StopCantConvert();
38-
default:
39-
break;
40-
}
41-
42-
return NANOARROW_OK;
43-
}
44-
45-
ArrowErrorCode Reserve(R_xlen_t n, ArrowError* error) override {
46-
NANOARROW_RETURN_NOT_OK(VctrBuilder::Reserve(n, error));
47-
SEXP value = PROTECT(Rf_allocVector(LGLSXP, n));
48-
SetValue(value);
49-
UNPROTECT(1);
50-
return NANOARROW_OK;
51-
}
52-
53-
ArrowErrorCode PushNext(const ArrowArray* array, ArrowError* error) override {
54-
int64_t not_null_count;
55-
if (array->null_count == -1 && array->buffers[0] == nullptr) {
56-
not_null_count = array->length;
57-
} else if (array->null_count == -1) {
58-
not_null_count =
59-
ArrowBitCountSet(reinterpret_cast<const uint8_t*>(array->buffers[0]),
60-
array->offset, array->length);
61-
} else {
62-
not_null_count = array->length - array->null_count;
63-
}
64-
65-
if (not_null_count > 0 && array->length > 0) {
66-
NANOARROW_RETURN_NOT_OK(
67-
WarnLossyConvert("that were non-null set to NA", not_null_count));
68-
}
69-
70-
int* value_ptr = LOGICAL(value_) + value_size_;
71-
for (int64_t i = 0; i < array->length; i++) {
72-
value_ptr[i] = NA_LOGICAL;
73-
}
74-
75-
return NANOARROW_OK;
76-
}
77-
};
78-
79-
class IntBuilder : public VctrBuilder {
80-
public:
81-
explicit IntBuilder(SEXP ptype_sexp) : VctrBuilder(VECTOR_TYPE_INT, ptype_sexp) {}
82-
83-
ArrowErrorCode Init(const ArrowSchema* schema, VctrBuilderOptions options,
84-
ArrowError* error) override {
85-
NANOARROW_RETURN_NOT_OK(VctrBuilder::Init(schema, options, error));
86-
return NANOARROW_OK;
87-
}
88-
89-
ArrowErrorCode Reserve(R_xlen_t n, ArrowError* error) override {
90-
NANOARROW_RETURN_NOT_OK(VctrBuilder::Reserve(n, error));
91-
SEXP value = PROTECT(Rf_allocVector(INTSXP, n));
92-
SetValue(value);
93-
UNPROTECT(1);
94-
return NANOARROW_OK;
95-
}
96-
97-
ArrowErrorCode PushNext(const ArrowArray* array, ArrowError* error) override {
98-
NANOARROW_RETURN_NOT_OK(VctrBuilder::PushNext(array, error));
99-
100-
int* result = INTEGER(value_);
101-
int64_t n_bad_values = 0;
102-
103-
// True for all the types supported here
104-
const uint8_t* is_valid = array_view_.buffer_views[0].data.as_uint8;
105-
int64_t raw_src_offset = array_view_.offset;
106-
R_xlen_t length = array->length;
107-
108-
// Fill the buffer
109-
switch (array_view_.storage_type) {
110-
case NANOARROW_TYPE_NA:
111-
for (R_xlen_t i = 0; i < length; i++) {
112-
result[value_size_ + i] = NA_INTEGER;
113-
}
114-
break;
115-
case NANOARROW_TYPE_INT32:
116-
memcpy(result + value_size_,
117-
array_view_.buffer_views[1].data.as_int32 + raw_src_offset,
118-
length * sizeof(int32_t));
119-
120-
// Set any nulls to NA_INTEGER
121-
if (is_valid != NULL && array_view_.null_count != 0) {
122-
for (R_xlen_t i = 0; i < length; i++) {
123-
if (!ArrowBitGet(is_valid, raw_src_offset + i)) {
124-
result[value_size_ + i] = NA_INTEGER;
125-
}
126-
}
127-
}
128-
break;
129-
case NANOARROW_TYPE_BOOL:
130-
ArrowBitsUnpackInt32(array_view_.buffer_views[1].data.as_uint8 + raw_src_offset,
131-
raw_src_offset, length, result + value_size_);
132-
133-
// Set any nulls to NA_LOGICAL
134-
if (is_valid != NULL && array_view_.null_count != 0) {
135-
for (R_xlen_t i = 0; i < length; i++) {
136-
if (!ArrowBitGet(is_valid, raw_src_offset + i)) {
137-
result[value_size_ + i] = NA_LOGICAL;
138-
}
139-
}
140-
}
141-
break;
142-
case NANOARROW_TYPE_INT8:
143-
case NANOARROW_TYPE_UINT8:
144-
case NANOARROW_TYPE_INT16:
145-
case NANOARROW_TYPE_UINT16:
146-
// No need to bounds check for these types
147-
for (R_xlen_t i = 0; i < length; i++) {
148-
result[value_size_ + i] = (int32_t)ArrowArrayViewGetIntUnsafe(&array_view_, i);
149-
}
150-
151-
// Set any nulls to NA_INTEGER
152-
if (is_valid != NULL && array_view_.null_count != 0) {
153-
for (R_xlen_t i = 0; i < length; i++) {
154-
if (!ArrowBitGet(is_valid, raw_src_offset + i)) {
155-
result[value_size_ + i] = NA_INTEGER;
156-
}
157-
}
158-
}
159-
break;
160-
case NANOARROW_TYPE_UINT32:
161-
case NANOARROW_TYPE_INT64:
162-
case NANOARROW_TYPE_UINT64:
163-
case NANOARROW_TYPE_FLOAT:
164-
case NANOARROW_TYPE_DOUBLE:
165-
// Loop + bounds check. Because we don't know what memory might be
166-
// in a null slot, we have to check nulls if there are any.
167-
if (is_valid != NULL && array_view_.null_count != 0) {
168-
for (R_xlen_t i = 0; i < length; i++) {
169-
if (ArrowBitGet(is_valid, raw_src_offset + i)) {
170-
int64_t value = ArrowArrayViewGetIntUnsafe(&array_view_, i);
171-
if (value > INT_MAX || value <= NA_INTEGER) {
172-
result[value_size_ + i] = NA_INTEGER;
173-
n_bad_values++;
174-
} else {
175-
result[value_size_ + i] = (int32_t)value;
176-
}
177-
} else {
178-
result[value_size_ + i] = NA_INTEGER;
179-
}
180-
}
181-
} else {
182-
for (R_xlen_t i = 0; i < length; i++) {
183-
int64_t value = ArrowArrayViewGetIntUnsafe(&array_view_, i);
184-
if (value > INT_MAX || value <= NA_INTEGER) {
185-
result[value_size_ + i] = NA_INTEGER;
186-
n_bad_values++;
187-
} else {
188-
result[value_size_ + i] = (int32_t)value;
189-
}
190-
}
191-
}
192-
break;
193-
194-
default:
195-
return EINVAL;
196-
}
197-
198-
if (n_bad_values > 0) {
199-
warn_lossy_conversion(n_bad_values, "outside integer range set to NA");
200-
}
201-
202-
return NANOARROW_OK;
203-
}
204-
205-
SEXP GetPtype() override { return Rf_allocVector(INTSXP, 0); }
206-
};
207-
20827
class DblBuilder : public VctrBuilder {
20928
public:
21029
explicit DblBuilder(SEXP ptype_sexp) : VctrBuilder(VECTOR_TYPE_DBL, ptype_sexp) {}

0 commit comments

Comments
 (0)